LLVM 22.0.0git
ExpandMemCmp.cpp
Go to the documentation of this file.
1//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass tries to expand memcmp() calls into optimally-sized loads and
10// compares for the target.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/Statistic.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/IRBuilder.h"
33#include <optional>
34
35using namespace llvm;
36using namespace llvm::PatternMatch;
37
38namespace llvm {
39class TargetLowering;
40}
41
42#define DEBUG_TYPE "expand-memcmp"
43
44STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
45STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
46STATISTIC(NumMemCmpGreaterThanMax,
47 "Number of memcmp calls with size greater than max size");
48STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
49
51 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
52 cl::desc("The number of loads per basic block for inline expansion of "
53 "memcmp that is only being compared against zero."));
54
56 "max-loads-per-memcmp", cl::Hidden,
57 cl::desc("Set maximum number of loads used in expanded memcmp"));
58
60 "max-loads-per-memcmp-opt-size", cl::Hidden,
61 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
62
63namespace {
64
65
66// This class provides helper functions to expand a memcmp library call into an
67// inline expansion.
68class MemCmpExpansion {
69 struct ResultBlock {
70 BasicBlock *BB = nullptr;
71 PHINode *PhiSrc1 = nullptr;
72 PHINode *PhiSrc2 = nullptr;
73
74 ResultBlock() = default;
75 };
76
77 CallInst *const CI = nullptr;
78 ResultBlock ResBlock;
79 const uint64_t Size;
80 unsigned MaxLoadSize = 0;
81 uint64_t NumLoadsNonOneByte = 0;
82 const uint64_t NumLoadsPerBlockForZeroCmp;
83 std::vector<BasicBlock *> LoadCmpBlocks;
84 BasicBlock *EndBlock = nullptr;
85 PHINode *PhiRes = nullptr;
86 const bool IsUsedForZeroCmp;
87 const DataLayout &DL;
88 DomTreeUpdater *DTU = nullptr;
89 IRBuilder<> Builder;
90 // Represents the decomposition in blocks of the expansion. For example,
91 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
92 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
93 struct LoadEntry {
94 LoadEntry(unsigned LoadSize, uint64_t Offset)
95 : LoadSize(LoadSize), Offset(Offset) {
96 }
97
98 // The size of the load for this block, in bytes.
99 unsigned LoadSize;
100 // The offset of this load from the base pointer, in bytes.
101 uint64_t Offset;
102 };
103 using LoadEntryVector = SmallVector<LoadEntry, 8>;
104 LoadEntryVector LoadSequence;
105
106 void createLoadCmpBlocks();
107 void createResultBlock();
108 void setupResultBlockPHINodes();
109 void setupEndBlockPHINodes();
110 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
111 void emitLoadCompareBlock(unsigned BlockIndex);
112 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
113 unsigned &LoadIndex);
114 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
115 void emitMemCmpResultBlock();
116 Value *getMemCmpExpansionZeroCase();
117 Value *getMemCmpEqZeroOneBlock();
118 Value *getMemCmpOneBlock();
119 struct LoadPair {
120 Value *Lhs = nullptr;
121 Value *Rhs = nullptr;
122 };
123 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,
124 Type *CmpSizeType, unsigned OffsetBytes);
125
126 static LoadEntryVector
127 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
128 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
129 static LoadEntryVector
130 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
131 unsigned MaxNumLoads,
132 unsigned &NumLoadsNonOneByte);
133
134 static void optimiseLoadSequence(
135 LoadEntryVector &LoadSequence,
136 const TargetTransformInfo::MemCmpExpansionOptions &Options,
137 bool IsUsedForZeroCmp);
138
139public:
140 MemCmpExpansion(CallInst *CI, uint64_t Size,
141 const TargetTransformInfo::MemCmpExpansionOptions &Options,
142 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
143 DomTreeUpdater *DTU);
144
145 unsigned getNumBlocks();
146 uint64_t getNumLoads() const { return LoadSequence.size(); }
147
148 Value *getMemCmpExpansion();
149};
150
151MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
152 uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
153 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
154 NumLoadsNonOneByte = 0;
155 LoadEntryVector LoadSequence;
156 uint64_t Offset = 0;
157 while (Size && !LoadSizes.empty()) {
158 const unsigned LoadSize = LoadSizes.front();
159 const uint64_t NumLoadsForThisSize = Size / LoadSize;
160 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
161 // Do not expand if the total number of loads is larger than what the
162 // target allows. Note that it's important that we exit before completing
163 // the expansion to avoid using a ton of memory to store the expansion for
164 // large sizes.
165 return {};
166 }
167 if (NumLoadsForThisSize > 0) {
168 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
169 LoadSequence.push_back({LoadSize, Offset});
170 Offset += LoadSize;
171 }
172 if (LoadSize > 1)
173 ++NumLoadsNonOneByte;
174 Size = Size % LoadSize;
175 }
176 LoadSizes = LoadSizes.drop_front();
177 }
178 return LoadSequence;
179}
180
181MemCmpExpansion::LoadEntryVector
182MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
183 const unsigned MaxLoadSize,
184 const unsigned MaxNumLoads,
185 unsigned &NumLoadsNonOneByte) {
186 // These are already handled by the greedy approach.
187 if (Size < 2 || MaxLoadSize < 2)
188 return {};
189
190 // We try to do as many non-overlapping loads as possible starting from the
191 // beginning.
192 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
193 assert(NumNonOverlappingLoads && "there must be at least one load");
194 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
195 // an overlapping load.
196 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
197 // Bail if we do not need an overloapping store, this is already handled by
198 // the greedy approach.
199 if (Size == 0)
200 return {};
201 // Bail if the number of loads (non-overlapping + potential overlapping one)
202 // is larger than the max allowed.
203 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
204 return {};
205
206 // Add non-overlapping loads.
207 LoadEntryVector LoadSequence;
208 uint64_t Offset = 0;
209 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
210 LoadSequence.push_back({MaxLoadSize, Offset});
211 Offset += MaxLoadSize;
212 }
213
214 // Add the last overlapping load.
215 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
216 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
217 NumLoadsNonOneByte = 1;
218 return LoadSequence;
219}
220
221void MemCmpExpansion::optimiseLoadSequence(
222 LoadEntryVector &LoadSequence,
223 const TargetTransformInfo::MemCmpExpansionOptions &Options,
224 bool IsUsedForZeroCmp) {
225 // This part of code attempts to optimize the LoadSequence by merging allowed
226 // subsequences into single loads of allowed sizes from
227 // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero
228 // comparison or if no allowed tail expansions are specified, we exit early.
229 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())
230 return;
231
232 while (LoadSequence.size() >= 2) {
233 auto Last = LoadSequence[LoadSequence.size() - 1];
234 auto PreLast = LoadSequence[LoadSequence.size() - 2];
235
236 // Exit the loop if the two sequences are not contiguous
237 if (PreLast.Offset + PreLast.LoadSize != Last.Offset)
238 break;
239
240 auto LoadSize = Last.LoadSize + PreLast.LoadSize;
241 if (find(Options.AllowedTailExpansions, LoadSize) ==
242 Options.AllowedTailExpansions.end())
243 break;
244
245 // Remove the last two sequences and replace with the combined sequence
246 LoadSequence.pop_back();
247 LoadSequence.pop_back();
248 LoadSequence.emplace_back(PreLast.Offset, LoadSize);
249 }
250}
251
252// Initialize the basic block structure required for expansion of memcmp call
253// with given maximum load size and memcmp size parameter.
254// This structure includes:
255// 1. A list of load compare blocks - LoadCmpBlocks.
256// 2. An EndBlock, split from original instruction point, which is the block to
257// return from.
258// 3. ResultBlock, block to branch to for early exit when a
259// LoadCmpBlock finds a difference.
260MemCmpExpansion::MemCmpExpansion(
261 CallInst *const CI, uint64_t Size,
262 const TargetTransformInfo::MemCmpExpansionOptions &Options,
263 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
264 DomTreeUpdater *DTU)
265 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
266 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
267 Builder(CI) {
268 assert(Size > 0 && "zero blocks");
269 // Scale the max size down if the target can load more bytes than we need.
270 llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
271 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
272 LoadSizes = LoadSizes.drop_front();
273 }
274 assert(!LoadSizes.empty() && "cannot load Size bytes");
275 MaxLoadSize = LoadSizes.front();
276 // Compute the decomposition.
277 unsigned GreedyNumLoadsNonOneByte = 0;
278 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
279 GreedyNumLoadsNonOneByte);
280 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
281 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
282 // If we allow overlapping loads and the load sequence is not already optimal,
283 // use overlapping loads.
284 if (Options.AllowOverlappingLoads &&
285 (LoadSequence.empty() || LoadSequence.size() > 2)) {
286 unsigned OverlappingNumLoadsNonOneByte = 0;
287 auto OverlappingLoads = computeOverlappingLoadSequence(
288 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
289 if (!OverlappingLoads.empty() &&
290 (LoadSequence.empty() ||
291 OverlappingLoads.size() < LoadSequence.size())) {
292 LoadSequence = OverlappingLoads;
293 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
294 }
295 }
296 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
297 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);
298}
299
300unsigned MemCmpExpansion::getNumBlocks() {
301 if (IsUsedForZeroCmp)
302 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
303 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
304 return getNumLoads();
305}
306
307void MemCmpExpansion::createLoadCmpBlocks() {
308 for (unsigned i = 0; i < getNumBlocks(); i++) {
309 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
310 EndBlock->getParent(), EndBlock);
311 LoadCmpBlocks.push_back(BB);
312 }
313}
314
315void MemCmpExpansion::createResultBlock() {
316 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
317 EndBlock->getParent(), EndBlock);
318}
319
320MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
321 Type *BSwapSizeType,
322 Type *CmpSizeType,
323 unsigned OffsetBytes) {
324 // Get the memory source at offset `OffsetBytes`.
325 Value *LhsSource = CI->getArgOperand(0);
326 Value *RhsSource = CI->getArgOperand(1);
327 Align LhsAlign = LhsSource->getPointerAlignment(DL);
328 Align RhsAlign = RhsSource->getPointerAlignment(DL);
329 if (OffsetBytes > 0) {
330 auto *ByteType = Type::getInt8Ty(CI->getContext());
331 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
332 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
333 LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
334 RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
335 }
336
337 // Create a constant or a load from the source.
338 Value *Lhs = nullptr;
339 if (auto *C = dyn_cast<Constant>(LhsSource))
340 Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
341 if (!Lhs)
342 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
343
344 Value *Rhs = nullptr;
345 if (auto *C = dyn_cast<Constant>(RhsSource))
346 Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
347 if (!Rhs)
348 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
349
350 // Zero extend if Byte Swap intrinsic has different type
351 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
352 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);
353 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);
354 }
355
356 // Swap bytes if required.
357 if (BSwapSizeType) {
359 CI->getModule(), Intrinsic::bswap, BSwapSizeType);
360 Lhs = Builder.CreateCall(Bswap, Lhs);
361 Rhs = Builder.CreateCall(Bswap, Rhs);
362 }
363
364 // Zero extend if required.
365 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {
366 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
367 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
368 }
369 return {Lhs, Rhs};
370}
371
372// This function creates the IR instructions for loading and comparing 1 byte.
373// It loads 1 byte from each source of the memcmp parameters with the given
374// GEPIndex. It then subtracts the two loaded values and adds this result to the
375// final phi node for selecting the memcmp result.
376void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
377 unsigned OffsetBytes) {
378 BasicBlock *BB = LoadCmpBlocks[BlockIndex];
379 Builder.SetInsertPoint(BB);
380 const LoadPair Loads =
381 getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr,
382 Type::getInt32Ty(CI->getContext()), OffsetBytes);
383 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
384
385 PhiRes->addIncoming(Diff, BB);
386
387 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
388 // Early exit branch if difference found to EndBlock. Otherwise, continue to
389 // next LoadCmpBlock,
390 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
391 ConstantInt::get(Diff->getType(), 0));
392 BranchInst *CmpBr =
393 BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
394 Builder.Insert(CmpBr);
395 if (DTU)
396 DTU->applyUpdates(
397 {{DominatorTree::Insert, BB, EndBlock},
398 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
399 } else {
400 // The last block has an unconditional branch to EndBlock.
401 BranchInst *CmpBr = BranchInst::Create(EndBlock);
402 Builder.Insert(CmpBr);
403 if (DTU)
404 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
405 }
406}
407
408/// Generate an equality comparison for one or more pairs of loaded values.
409/// This is used in the case where the memcmp() call is compared equal or not
410/// equal to zero.
411Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
412 unsigned &LoadIndex) {
413 assert(LoadIndex < getNumLoads() &&
414 "getCompareLoadPairs() called with no remaining loads");
415 std::vector<Value *> XorList, OrList;
416 Value *Diff = nullptr;
417
418 const unsigned NumLoads =
419 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
420
421 // For a single-block expansion, start inserting before the memcmp call.
422 if (LoadCmpBlocks.empty())
423 Builder.SetInsertPoint(CI);
424 else
425 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
426
427 Value *Cmp = nullptr;
428 // If we have multiple loads per block, we need to generate a composite
429 // comparison using xor+or. The type for the combinations is the largest load
430 // type.
431 IntegerType *const MaxLoadType =
432 NumLoads == 1 ? nullptr
433 : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
434
435 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
436 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
437 const LoadPair Loads = getLoadPair(
438 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr,
439 MaxLoadType, CurLoadEntry.Offset);
440
441 if (NumLoads != 1) {
442 // If we have multiple loads per block, we need to generate a composite
443 // comparison using xor+or.
444 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
445 Diff = Builder.CreateZExt(Diff, MaxLoadType);
446 XorList.push_back(Diff);
447 } else {
448 // If there's only one load per block, we just compare the loaded values.
449 Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
450 }
451 }
452
453 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
454 std::vector<Value *> OutList;
455 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
456 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
457 OutList.push_back(Or);
458 }
459 if (InList.size() % 2 != 0)
460 OutList.push_back(InList.back());
461 return OutList;
462 };
463
464 if (!Cmp) {
465 // Pairwise OR the XOR results.
466 OrList = pairWiseOr(XorList);
467
468 // Pairwise OR the OR results until one result left.
469 while (OrList.size() != 1) {
470 OrList = pairWiseOr(OrList);
471 }
472
473 assert(Diff && "Failed to find comparison diff");
474 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
475 }
476
477 return Cmp;
478}
479
480void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
481 unsigned &LoadIndex) {
482 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
483
484 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
485 ? EndBlock
486 : LoadCmpBlocks[BlockIndex + 1];
487 // Early exit branch if difference found to ResultBlock. Otherwise,
488 // continue to next LoadCmpBlock or EndBlock.
489 BasicBlock *BB = Builder.GetInsertBlock();
490 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
491 Builder.Insert(CmpBr);
492 if (DTU)
493 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
494 {DominatorTree::Insert, BB, NextBB}});
495
496 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
497 // since early exit to ResultBlock was not taken (no difference was found in
498 // any of the bytes).
499 if (BlockIndex == LoadCmpBlocks.size() - 1) {
500 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
501 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
502 }
503}
504
505// This function creates the IR intructions for loading and comparing using the
506// given LoadSize. It loads the number of bytes specified by LoadSize from each
507// source of the memcmp parameters. It then does a subtract to see if there was
508// a difference in the loaded values. If a difference is found, it branches
509// with an early exit to the ResultBlock for calculating which source was
510// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
511// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
512// a special case through emitLoadCompareByteBlock. The special handling can
513// simply subtract the loaded values and add it to the result phi node.
514void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
515 // There is one load per block in this case, BlockIndex == LoadIndex.
516 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
517
518 if (CurLoadEntry.LoadSize == 1) {
519 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
520 return;
521 }
522
523 Type *LoadSizeType =
524 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
525 Type *BSwapSizeType =
526 DL.isLittleEndian()
528 PowerOf2Ceil(CurLoadEntry.LoadSize * 8))
529 : nullptr;
530 Type *MaxLoadType = IntegerType::get(
531 CI->getContext(),
532 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);
533 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
534
535 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
536
537 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
538 CurLoadEntry.Offset);
539
540 // Add the loaded values to the phi nodes for calculating memcmp result only
541 // if result is not used in a zero equality.
542 if (!IsUsedForZeroCmp) {
543 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
544 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
545 }
546
547 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
548 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
549 ? EndBlock
550 : LoadCmpBlocks[BlockIndex + 1];
551 // Early exit branch if difference found to ResultBlock. Otherwise, continue
552 // to next LoadCmpBlock or EndBlock.
553 BasicBlock *BB = Builder.GetInsertBlock();
554 BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
555 Builder.Insert(CmpBr);
556 if (DTU)
557 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
558 {DominatorTree::Insert, BB, ResBlock.BB}});
559
560 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
561 // since early exit to ResultBlock was not taken (no difference was found in
562 // any of the bytes).
563 if (BlockIndex == LoadCmpBlocks.size() - 1) {
564 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
565 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
566 }
567}
568
569// This function populates the ResultBlock with a sequence to calculate the
570// memcmp result. It compares the two loaded source values and returns -1 if
571// src1 < src2 and 1 if src1 > src2.
572void MemCmpExpansion::emitMemCmpResultBlock() {
573 // Special case: if memcmp result is used in a zero equality, result does not
574 // need to be calculated and can simply return 1.
575 if (IsUsedForZeroCmp) {
576 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
577 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
578 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
579 PhiRes->addIncoming(Res, ResBlock.BB);
580 BranchInst *NewBr = BranchInst::Create(EndBlock);
581 Builder.Insert(NewBr);
582 if (DTU)
583 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
584 return;
585 }
586 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
587 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
588
589 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
590 ResBlock.PhiSrc2);
591
592 Value *Res =
593 Builder.CreateSelect(Cmp, Constant::getAllOnesValue(Builder.getInt32Ty()),
594 ConstantInt::get(Builder.getInt32Ty(), 1));
595
596 PhiRes->addIncoming(Res, ResBlock.BB);
597 BranchInst *NewBr = BranchInst::Create(EndBlock);
598 Builder.Insert(NewBr);
599 if (DTU)
600 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
601}
602
603void MemCmpExpansion::setupResultBlockPHINodes() {
604 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
605 Builder.SetInsertPoint(ResBlock.BB);
606 // Note: this assumes one load per block.
607 ResBlock.PhiSrc1 =
608 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
609 ResBlock.PhiSrc2 =
610 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
611}
612
613void MemCmpExpansion::setupEndBlockPHINodes() {
614 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
615 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
616}
617
618Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
619 unsigned LoadIndex = 0;
620 // This loop populates each of the LoadCmpBlocks with the IR sequence to
621 // handle multiple loads per block.
622 for (unsigned I = 0; I < getNumBlocks(); ++I) {
623 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
624 }
625
626 emitMemCmpResultBlock();
627 return PhiRes;
628}
629
630/// A memcmp expansion that compares equality with 0 and only has one block of
631/// load and compare can bypass the compare, branch, and phi IR that is required
632/// in the general case.
633Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
634 unsigned LoadIndex = 0;
635 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
636 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
637 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
638}
639
640/// A memcmp expansion that only has one block of load and compare can bypass
641/// the compare, branch, and phi IR that is required in the general case.
642/// This function also analyses users of memcmp, and if there is only one user
643/// from which we can conclude that only 2 out of 3 memcmp outcomes really
644/// matter, then it generates more efficient code with only one comparison.
645Value *MemCmpExpansion::getMemCmpOneBlock() {
646 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
647 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
648 Type *BSwapSizeType =
649 NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8))
650 : nullptr;
651 Type *MaxLoadType =
653 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8);
654
655 // The i8 and i16 cases don't need compares. We zext the loaded values and
656 // subtract them to get the suitable negative, zero, or positive i32 result.
657 if (Size == 1 || Size == 2) {
658 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,
659 Builder.getInt32Ty(), /*Offset*/ 0);
660 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
661 }
662
663 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
664 /*Offset*/ 0);
665
666 // If a user of memcmp cares only about two outcomes, for example:
667 // bool result = memcmp(a, b, NBYTES) > 0;
668 // We can generate more optimal code with a smaller number of operations
669 if (CI->hasOneUser()) {
670 auto *UI = cast<Instruction>(*CI->user_begin());
671 CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
672 bool NeedsZExt = false;
673 // This is a special case because instead of checking if the result is less
674 // than zero:
675 // bool result = memcmp(a, b, NBYTES) < 0;
676 // Compiler is clever enough to generate the following code:
677 // bool result = memcmp(a, b, NBYTES) >> 31;
678 if (match(UI,
679 m_LShr(m_Value(),
680 m_SpecificInt(CI->getType()->getIntegerBitWidth() - 1)))) {
681 Pred = ICmpInst::ICMP_SLT;
682 NeedsZExt = true;
683 } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(CI),
684 m_AllOnes()))) {
685 // Adjust predicate as if it compared with 0.
686 Pred = ICmpInst::ICMP_SGE;
687 } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(CI),
688 m_One()))) {
689 // Adjust predicate as if it compared with 0.
690 Pred = ICmpInst::ICMP_SLE;
691 } else {
692 // In case of a successful match this call will set `Pred` variable
693 match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
694 }
695 // Generate new code and remove the original memcmp call and the user
696 if (ICmpInst::isSigned(Pred)) {
698 Loads.Lhs, Loads.Rhs);
699 auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, UI->getType()) : Cmp;
700 UI->replaceAllUsesWith(Result);
701 UI->eraseFromParent();
702 CI->eraseFromParent();
703 return nullptr;
704 }
705 }
706
707 // The result of memcmp is negative, zero, or positive.
708 return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::ucmp,
709 {Loads.Lhs, Loads.Rhs});
710}
711
712// This function expands the memcmp call into an inline expansion and returns
713// the memcmp result. Returns nullptr if the memcmp is already replaced.
714Value *MemCmpExpansion::getMemCmpExpansion() {
715 // Create the basic block framework for a multi-block expansion.
716 if (getNumBlocks() != 1) {
717 BasicBlock *StartBlock = CI->getParent();
718 EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
719 /*MSSAU=*/nullptr, "endblock");
720 setupEndBlockPHINodes();
721 createResultBlock();
722
723 // If return value of memcmp is not used in a zero equality, we need to
724 // calculate which source was larger. The calculation requires the
725 // two loaded source values of each load compare block.
726 // These will be saved in the phi nodes created by setupResultBlockPHINodes.
727 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
728
729 // Create the number of required load compare basic blocks.
730 createLoadCmpBlocks();
731
732 // Update the terminator added by SplitBlock to branch to the first
733 // LoadCmpBlock.
734 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
735 if (DTU)
736 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
737 {DominatorTree::Delete, StartBlock, EndBlock}});
738 }
739
741
742 if (IsUsedForZeroCmp)
743 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
744 : getMemCmpExpansionZeroCase();
745
746 if (getNumBlocks() == 1)
747 return getMemCmpOneBlock();
748
749 for (unsigned I = 0; I < getNumBlocks(); ++I) {
750 emitLoadCompareBlock(I);
751 }
752
753 emitMemCmpResultBlock();
754 return PhiRes;
755}
756
757// This function checks to see if an expansion of memcmp can be generated.
758// It checks for constant compare size that is less than the max inline size.
759// If an expansion cannot occur, returns false to leave as a library call.
760// Otherwise, the library call is replaced with a new IR instruction sequence.
761/// We want to transform:
762/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
763/// To:
764/// loadbb:
765/// %0 = bitcast i32* %buffer2 to i8*
766/// %1 = bitcast i32* %buffer1 to i8*
767/// %2 = bitcast i8* %1 to i64*
768/// %3 = bitcast i8* %0 to i64*
769/// %4 = load i64, i64* %2
770/// %5 = load i64, i64* %3
771/// %6 = call i64 @llvm.bswap.i64(i64 %4)
772/// %7 = call i64 @llvm.bswap.i64(i64 %5)
773/// %8 = sub i64 %6, %7
774/// %9 = icmp ne i64 %8, 0
775/// br i1 %9, label %res_block, label %loadbb1
776/// res_block: ; preds = %loadbb2,
777/// %loadbb1, %loadbb
778/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
779/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
780/// %10 = icmp ult i64 %phi.src1, %phi.src2
781/// %11 = select i1 %10, i32 -1, i32 1
782/// br label %endblock
783/// loadbb1: ; preds = %loadbb
784/// %12 = bitcast i32* %buffer2 to i8*
785/// %13 = bitcast i32* %buffer1 to i8*
786/// %14 = bitcast i8* %13 to i32*
787/// %15 = bitcast i8* %12 to i32*
788/// %16 = getelementptr i32, i32* %14, i32 2
789/// %17 = getelementptr i32, i32* %15, i32 2
790/// %18 = load i32, i32* %16
791/// %19 = load i32, i32* %17
792/// %20 = call i32 @llvm.bswap.i32(i32 %18)
793/// %21 = call i32 @llvm.bswap.i32(i32 %19)
794/// %22 = zext i32 %20 to i64
795/// %23 = zext i32 %21 to i64
796/// %24 = sub i64 %22, %23
797/// %25 = icmp ne i64 %24, 0
798/// br i1 %25, label %res_block, label %loadbb2
799/// loadbb2: ; preds = %loadbb1
800/// %26 = bitcast i32* %buffer2 to i8*
801/// %27 = bitcast i32* %buffer1 to i8*
802/// %28 = bitcast i8* %27 to i16*
803/// %29 = bitcast i8* %26 to i16*
804/// %30 = getelementptr i16, i16* %28, i16 6
805/// %31 = getelementptr i16, i16* %29, i16 6
806/// %32 = load i16, i16* %30
807/// %33 = load i16, i16* %31
808/// %34 = call i16 @llvm.bswap.i16(i16 %32)
809/// %35 = call i16 @llvm.bswap.i16(i16 %33)
810/// %36 = zext i16 %34 to i64
811/// %37 = zext i16 %35 to i64
812/// %38 = sub i64 %36, %37
813/// %39 = icmp ne i64 %38, 0
814/// br i1 %39, label %res_block, label %loadbb3
815/// loadbb3: ; preds = %loadbb2
816/// %40 = bitcast i32* %buffer2 to i8*
817/// %41 = bitcast i32* %buffer1 to i8*
818/// %42 = getelementptr i8, i8* %41, i8 14
819/// %43 = getelementptr i8, i8* %40, i8 14
820/// %44 = load i8, i8* %42
821/// %45 = load i8, i8* %43
822/// %46 = zext i8 %44 to i32
823/// %47 = zext i8 %45 to i32
824/// %48 = sub i32 %46, %47
825/// br label %endblock
826/// endblock: ; preds = %res_block,
827/// %loadbb3
828/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
829/// ret i32 %phi.res
830static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
831 const TargetLowering *TLI, const DataLayout *DL,
832 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
833 DomTreeUpdater *DTU, const bool IsBCmp) {
834 NumMemCmpCalls++;
835
836 // Early exit from expansion if -Oz.
837 if (CI->getFunction()->hasMinSize())
838 return false;
839
840 // Early exit from expansion if size is not a constant.
841 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
842 if (!SizeCast) {
843 NumMemCmpNotConstant++;
844 return false;
845 }
846 const uint64_t SizeVal = SizeCast->getZExtValue();
847
848 if (SizeVal == 0) {
849 return false;
850 }
851 // TTI call to check if target would like to expand memcmp. Also, get the
852 // available load sizes.
853 const bool IsUsedForZeroCmp =
855 bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
856 auto Options = TTI->enableMemCmpExpansion(OptForSize,
857 IsUsedForZeroCmp);
858 if (!Options) return false;
859
860 if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
862
863 if (OptForSize &&
864 MaxLoadsPerMemcmpOptSize.getNumOccurrences())
865 Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
866
867 if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
868 Options.MaxNumLoads = MaxLoadsPerMemcmp;
869
870 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
871
872 // Don't expand if this will require more loads than desired by the target.
873 if (Expansion.getNumLoads() == 0) {
874 NumMemCmpGreaterThanMax++;
875 return false;
876 }
877
878 NumMemCmpInlined++;
879
880 if (Value *Res = Expansion.getMemCmpExpansion()) {
881 // Replace call with result of expansion and erase call.
882 CI->replaceAllUsesWith(Res);
883 CI->eraseFromParent();
884 }
885
886 return true;
887}
888
889// Returns true if a change was made.
890static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
891 const TargetTransformInfo *TTI, const TargetLowering *TL,
892 const DataLayout &DL, ProfileSummaryInfo *PSI,
893 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
894
895static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
896 const TargetTransformInfo *TTI,
897 const TargetLowering *TL,
898 ProfileSummaryInfo *PSI,
899 BlockFrequencyInfo *BFI, DominatorTree *DT);
900
901class ExpandMemCmpLegacyPass : public FunctionPass {
902public:
903 static char ID;
904
905 ExpandMemCmpLegacyPass() : FunctionPass(ID) {
907 }
908
909 bool runOnFunction(Function &F) override {
910 if (skipFunction(F)) return false;
911
912 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
913 if (!TPC) {
914 return false;
915 }
916 const TargetLowering* TL =
917 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
918
919 const TargetLibraryInfo *TLI =
920 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
921 const TargetTransformInfo *TTI =
922 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
923 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
924 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
925 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
926 nullptr;
927 DominatorTree *DT = nullptr;
928 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
929 DT = &DTWP->getDomTree();
930 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
931 return !PA.areAllPreserved();
932 }
933
934private:
935 void getAnalysisUsage(AnalysisUsage &AU) const override {
936 AU.addRequired<TargetLibraryInfoWrapperPass>();
937 AU.addRequired<TargetTransformInfoWrapperPass>();
938 AU.addRequired<ProfileSummaryInfoWrapperPass>();
939 AU.addPreserved<DominatorTreeWrapperPass>();
941 FunctionPass::getAnalysisUsage(AU);
942 }
943};
944
945bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
946 const TargetTransformInfo *TTI, const TargetLowering *TL,
947 const DataLayout &DL, ProfileSummaryInfo *PSI,
948 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
949 for (Instruction &I : BB) {
950 CallInst *CI = dyn_cast<CallInst>(&I);
951 if (!CI) {
952 continue;
953 }
955 if (TLI->getLibFunc(*CI, Func) &&
956 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
957 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
958 return true;
959 }
960 }
961 return false;
962}
963
964PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
965 const TargetTransformInfo *TTI,
966 const TargetLowering *TL, ProfileSummaryInfo *PSI,
967 BlockFrequencyInfo *BFI, DominatorTree *DT) {
968 std::optional<DomTreeUpdater> DTU;
969 if (DT)
970 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
971
972 const DataLayout& DL = F.getDataLayout();
973 bool MadeChanges = false;
974 for (auto BBIt = F.begin(); BBIt != F.end();) {
975 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
976 MadeChanges = true;
977 // If changes were made, restart the function from the beginning, since
978 // the structure of the function was changed.
979 BBIt = F.begin();
980 } else {
981 ++BBIt;
982 }
983 }
984 if (MadeChanges)
985 for (BasicBlock &BB : F)
987 if (!MadeChanges)
988 return PreservedAnalyses::all();
989 PreservedAnalyses PA;
990 PA.preserve<DominatorTreeAnalysis>();
991 return PA;
992}
993
994} // namespace
995
998 const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
999 const auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1000 const auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
1001 auto *PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
1002 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1003 BlockFrequencyInfo *BFI = (PSI && PSI->hasProfileSummary())
1004 ? &FAM.getResult<BlockFrequencyAnalysis>(F)
1005 : nullptr;
1006 auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
1007
1008 return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);
1009}
1010
1011char ExpandMemCmpLegacyPass::ID = 0;
1012INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE,
1013 "Expand memcmp() to load/stores", false, false)
1019INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE,
1020 "Expand memcmp() to load/stores", false, false)
1021
1023 return new ExpandMemCmpLegacyPass();
1024}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
DXIL Intrinsic Expansion
static bool runOnFunction(Function &F, bool PostInlining)
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
#define DEBUG_TYPE
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:200
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Value * getArgOperand(unsigned i) const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition IRBuilder.h:1986
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1867
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:562
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2336
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2497
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition IRBuilder.h:172
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1599
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
bool hasProfileSummary() const
Returns true if profile summary is available.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Wrapper pass for TargetTransformInfo.
LLVM_ABI MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition Value.cpp:166
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
const ParentTy * getParent() const
Definition ilist_node.h:34
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:721
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI void initializeExpandMemCmpLegacyPassPass(PassRegistry &)
LLVM_ABI FunctionPass * createExpandMemCmpLegacyPass()
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Or
Bitwise or logical OR of integers.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.