LLVM 17.0.0git
ExpandMemCmp.cpp
Go to the documentation of this file.
1//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass tries to expand memcmp() calls into optimally-sized loads and
10// compares for the target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/IRBuilder.h"
31#include <optional>
32
33using namespace llvm;
34
35namespace llvm {
36class TargetLowering;
37}
38
39#define DEBUG_TYPE "expandmemcmp"
40
41STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
42STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
43STATISTIC(NumMemCmpGreaterThanMax,
44 "Number of memcmp calls with size greater than max size");
45STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
46
48 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
49 cl::desc("The number of loads per basic block for inline expansion of "
50 "memcmp that is only being compared against zero."));
51
53 "max-loads-per-memcmp", cl::Hidden,
54 cl::desc("Set maximum number of loads used in expanded memcmp"));
55
57 "max-loads-per-memcmp-opt-size", cl::Hidden,
58 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
59
60namespace {
61
62
63// This class provides helper functions to expand a memcmp library call into an
64// inline expansion.
65class MemCmpExpansion {
66 struct ResultBlock {
67 BasicBlock *BB = nullptr;
68 PHINode *PhiSrc1 = nullptr;
69 PHINode *PhiSrc2 = nullptr;
70
71 ResultBlock() = default;
72 };
73
74 CallInst *const CI = nullptr;
75 ResultBlock ResBlock;
76 const uint64_t Size;
77 unsigned MaxLoadSize = 0;
78 uint64_t NumLoadsNonOneByte = 0;
79 const uint64_t NumLoadsPerBlockForZeroCmp;
80 std::vector<BasicBlock *> LoadCmpBlocks;
81 BasicBlock *EndBlock = nullptr;
82 PHINode *PhiRes = nullptr;
83 const bool IsUsedForZeroCmp;
84 const DataLayout &DL;
85 DomTreeUpdater *DTU = nullptr;
87 // Represents the decomposition in blocks of the expansion. For example,
88 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
89 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
90 struct LoadEntry {
91 LoadEntry(unsigned LoadSize, uint64_t Offset)
92 : LoadSize(LoadSize), Offset(Offset) {
93 }
94
95 // The size of the load for this block, in bytes.
96 unsigned LoadSize;
97 // The offset of this load from the base pointer, in bytes.
99 };
100 using LoadEntryVector = SmallVector<LoadEntry, 8>;
101 LoadEntryVector LoadSequence;
102
103 void createLoadCmpBlocks();
104 void createResultBlock();
105 void setupResultBlockPHINodes();
106 void setupEndBlockPHINodes();
107 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
108 void emitLoadCompareBlock(unsigned BlockIndex);
109 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
110 unsigned &LoadIndex);
111 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
112 void emitMemCmpResultBlock();
113 Value *getMemCmpExpansionZeroCase();
114 Value *getMemCmpEqZeroOneBlock();
115 Value *getMemCmpOneBlock();
116 struct LoadPair {
117 Value *Lhs = nullptr;
118 Value *Rhs = nullptr;
119 };
120 LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType,
121 unsigned OffsetBytes);
122
123 static LoadEntryVector
124 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
125 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
126 static LoadEntryVector
127 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
128 unsigned MaxNumLoads,
129 unsigned &NumLoadsNonOneByte);
130
131public:
132 MemCmpExpansion(CallInst *CI, uint64_t Size,
134 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
135 DomTreeUpdater *DTU);
136
137 unsigned getNumBlocks();
138 uint64_t getNumLoads() const { return LoadSequence.size(); }
139
140 Value *getMemCmpExpansion();
141};
142
143MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
145 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
146 NumLoadsNonOneByte = 0;
147 LoadEntryVector LoadSequence;
148 uint64_t Offset = 0;
149 while (Size && !LoadSizes.empty()) {
150 const unsigned LoadSize = LoadSizes.front();
151 const uint64_t NumLoadsForThisSize = Size / LoadSize;
152 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
153 // Do not expand if the total number of loads is larger than what the
154 // target allows. Note that it's important that we exit before completing
155 // the expansion to avoid using a ton of memory to store the expansion for
156 // large sizes.
157 return {};
158 }
159 if (NumLoadsForThisSize > 0) {
160 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
161 LoadSequence.push_back({LoadSize, Offset});
162 Offset += LoadSize;
163 }
164 if (LoadSize > 1)
165 ++NumLoadsNonOneByte;
166 Size = Size % LoadSize;
167 }
168 LoadSizes = LoadSizes.drop_front();
169 }
170 return LoadSequence;
171}
172
174MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
175 const unsigned MaxLoadSize,
176 const unsigned MaxNumLoads,
177 unsigned &NumLoadsNonOneByte) {
178 // These are already handled by the greedy approach.
179 if (Size < 2 || MaxLoadSize < 2)
180 return {};
181
182 // We try to do as many non-overlapping loads as possible starting from the
183 // beginning.
184 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
185 assert(NumNonOverlappingLoads && "there must be at least one load");
186 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
187 // an overlapping load.
188 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
189 // Bail if we do not need an overloapping store, this is already handled by
190 // the greedy approach.
191 if (Size == 0)
192 return {};
193 // Bail if the number of loads (non-overlapping + potential overlapping one)
194 // is larger than the max allowed.
195 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
196 return {};
197
198 // Add non-overlapping loads.
199 LoadEntryVector LoadSequence;
200 uint64_t Offset = 0;
201 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
202 LoadSequence.push_back({MaxLoadSize, Offset});
203 Offset += MaxLoadSize;
204 }
205
206 // Add the last overlapping load.
207 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
208 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
209 NumLoadsNonOneByte = 1;
210 return LoadSequence;
211}
212
213// Initialize the basic block structure required for expansion of memcmp call
214// with given maximum load size and memcmp size parameter.
215// This structure includes:
216// 1. A list of load compare blocks - LoadCmpBlocks.
217// 2. An EndBlock, split from original instruction point, which is the block to
218// return from.
219// 3. ResultBlock, block to branch to for early exit when a
220// LoadCmpBlock finds a difference.
221MemCmpExpansion::MemCmpExpansion(
222 CallInst *const CI, uint64_t Size,
224 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
225 DomTreeUpdater *DTU)
226 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
227 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
228 Builder(CI) {
229 assert(Size > 0 && "zero blocks");
230 // Scale the max size down if the target can load more bytes than we need.
231 llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
232 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
233 LoadSizes = LoadSizes.drop_front();
234 }
235 assert(!LoadSizes.empty() && "cannot load Size bytes");
236 MaxLoadSize = LoadSizes.front();
237 // Compute the decomposition.
238 unsigned GreedyNumLoadsNonOneByte = 0;
239 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
240 GreedyNumLoadsNonOneByte);
241 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
242 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
243 // If we allow overlapping loads and the load sequence is not already optimal,
244 // use overlapping loads.
245 if (Options.AllowOverlappingLoads &&
246 (LoadSequence.empty() || LoadSequence.size() > 2)) {
247 unsigned OverlappingNumLoadsNonOneByte = 0;
248 auto OverlappingLoads = computeOverlappingLoadSequence(
249 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
250 if (!OverlappingLoads.empty() &&
251 (LoadSequence.empty() ||
252 OverlappingLoads.size() < LoadSequence.size())) {
253 LoadSequence = OverlappingLoads;
254 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
255 }
256 }
257 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
258}
259
260unsigned MemCmpExpansion::getNumBlocks() {
261 if (IsUsedForZeroCmp)
262 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
263 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
264 return getNumLoads();
265}
266
267void MemCmpExpansion::createLoadCmpBlocks() {
268 for (unsigned i = 0; i < getNumBlocks(); i++) {
269 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
270 EndBlock->getParent(), EndBlock);
271 LoadCmpBlocks.push_back(BB);
272 }
273}
274
275void MemCmpExpansion::createResultBlock() {
276 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
277 EndBlock->getParent(), EndBlock);
278}
279
280MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
281 bool NeedsBSwap,
282 Type *CmpSizeType,
283 unsigned OffsetBytes) {
284 // Get the memory source at offset `OffsetBytes`.
285 Value *LhsSource = CI->getArgOperand(0);
286 Value *RhsSource = CI->getArgOperand(1);
287 Align LhsAlign = LhsSource->getPointerAlignment(DL);
288 Align RhsAlign = RhsSource->getPointerAlignment(DL);
289 if (OffsetBytes > 0) {
290 auto *ByteType = Type::getInt8Ty(CI->getContext());
291 LhsSource = Builder.CreateConstGEP1_64(
292 ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
293 OffsetBytes);
294 RhsSource = Builder.CreateConstGEP1_64(
295 ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
296 OffsetBytes);
297 LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
298 RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
299 }
300 LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
301 RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
302
303 // Create a constant or a load from the source.
304 Value *Lhs = nullptr;
305 if (auto *C = dyn_cast<Constant>(LhsSource))
306 Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
307 if (!Lhs)
308 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
309
310 Value *Rhs = nullptr;
311 if (auto *C = dyn_cast<Constant>(RhsSource))
312 Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
313 if (!Rhs)
314 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
315
316 // Swap bytes if required.
317 if (NeedsBSwap) {
319 Intrinsic::bswap, LoadSizeType);
320 Lhs = Builder.CreateCall(Bswap, Lhs);
321 Rhs = Builder.CreateCall(Bswap, Rhs);
322 }
323
324 // Zero extend if required.
325 if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
326 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
327 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
328 }
329 return {Lhs, Rhs};
330}
331
332// This function creates the IR instructions for loading and comparing 1 byte.
333// It loads 1 byte from each source of the memcmp parameters with the given
334// GEPIndex. It then subtracts the two loaded values and adds this result to the
335// final phi node for selecting the memcmp result.
336void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
337 unsigned OffsetBytes) {
338 BasicBlock *BB = LoadCmpBlocks[BlockIndex];
339 Builder.SetInsertPoint(BB);
340 const LoadPair Loads =
341 getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
342 Type::getInt32Ty(CI->getContext()), OffsetBytes);
343 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
344
345 PhiRes->addIncoming(Diff, BB);
346
347 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
348 // Early exit branch if difference found to EndBlock. Otherwise, continue to
349 // next LoadCmpBlock,
350 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
351 ConstantInt::get(Diff->getType(), 0));
352 BranchInst *CmpBr =
353 BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
354 Builder.Insert(CmpBr);
355 if (DTU)
356 DTU->applyUpdates(
357 {{DominatorTree::Insert, BB, EndBlock},
358 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
359 } else {
360 // The last block has an unconditional branch to EndBlock.
361 BranchInst *CmpBr = BranchInst::Create(EndBlock);
362 Builder.Insert(CmpBr);
363 if (DTU)
364 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
365 }
366}
367
368/// Generate an equality comparison for one or more pairs of loaded values.
369/// This is used in the case where the memcmp() call is compared equal or not
370/// equal to zero.
371Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
372 unsigned &LoadIndex) {
373 assert(LoadIndex < getNumLoads() &&
374 "getCompareLoadPairs() called with no remaining loads");
375 std::vector<Value *> XorList, OrList;
376 Value *Diff = nullptr;
377
378 const unsigned NumLoads =
379 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
380
381 // For a single-block expansion, start inserting before the memcmp call.
382 if (LoadCmpBlocks.empty())
383 Builder.SetInsertPoint(CI);
384 else
385 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
386
387 Value *Cmp = nullptr;
388 // If we have multiple loads per block, we need to generate a composite
389 // comparison using xor+or. The type for the combinations is the largest load
390 // type.
391 IntegerType *const MaxLoadType =
392 NumLoads == 1 ? nullptr
393 : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
394 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
395 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
396 const LoadPair Loads = getLoadPair(
397 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
398 /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset);
399
400 if (NumLoads != 1) {
401 // If we have multiple loads per block, we need to generate a composite
402 // comparison using xor+or.
403 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
404 Diff = Builder.CreateZExt(Diff, MaxLoadType);
405 XorList.push_back(Diff);
406 } else {
407 // If there's only one load per block, we just compare the loaded values.
408 Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
409 }
410 }
411
412 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
413 std::vector<Value *> OutList;
414 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
415 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
416 OutList.push_back(Or);
417 }
418 if (InList.size() % 2 != 0)
419 OutList.push_back(InList.back());
420 return OutList;
421 };
422
423 if (!Cmp) {
424 // Pairwise OR the XOR results.
425 OrList = pairWiseOr(XorList);
426
427 // Pairwise OR the OR results until one result left.
428 while (OrList.size() != 1) {
429 OrList = pairWiseOr(OrList);
430 }
431
432 assert(Diff && "Failed to find comparison diff");
433 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
434 }
435
436 return Cmp;
437}
438
439void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
440 unsigned &LoadIndex) {
441 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
442
443 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
444 ? EndBlock
445 : LoadCmpBlocks[BlockIndex + 1];
446 // Early exit branch if difference found to ResultBlock. Otherwise,
447 // continue to next LoadCmpBlock or EndBlock.
448 BasicBlock *BB = Builder.GetInsertBlock();
449 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
450 Builder.Insert(CmpBr);
451 if (DTU)
452 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
453 {DominatorTree::Insert, BB, NextBB}});
454
455 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
456 // since early exit to ResultBlock was not taken (no difference was found in
457 // any of the bytes).
458 if (BlockIndex == LoadCmpBlocks.size() - 1) {
460 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
461 }
462}
463
464// This function creates the IR intructions for loading and comparing using the
465// given LoadSize. It loads the number of bytes specified by LoadSize from each
466// source of the memcmp parameters. It then does a subtract to see if there was
467// a difference in the loaded values. If a difference is found, it branches
468// with an early exit to the ResultBlock for calculating which source was
469// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
470// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
471// a special case through emitLoadCompareByteBlock. The special handling can
472// simply subtract the loaded values and add it to the result phi node.
473void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
474 // There is one load per block in this case, BlockIndex == LoadIndex.
475 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
476
477 if (CurLoadEntry.LoadSize == 1) {
478 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
479 return;
480 }
481
482 Type *LoadSizeType =
483 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
484 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
485 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
486
487 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
488
489 const LoadPair Loads =
490 getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType,
491 CurLoadEntry.Offset);
492
493 // Add the loaded values to the phi nodes for calculating memcmp result only
494 // if result is not used in a zero equality.
495 if (!IsUsedForZeroCmp) {
496 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
497 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
498 }
499
500 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
501 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
502 ? EndBlock
503 : LoadCmpBlocks[BlockIndex + 1];
504 // Early exit branch if difference found to ResultBlock. Otherwise, continue
505 // to next LoadCmpBlock or EndBlock.
506 BasicBlock *BB = Builder.GetInsertBlock();
507 BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
508 Builder.Insert(CmpBr);
509 if (DTU)
510 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
511 {DominatorTree::Insert, BB, ResBlock.BB}});
512
513 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
514 // since early exit to ResultBlock was not taken (no difference was found in
515 // any of the bytes).
516 if (BlockIndex == LoadCmpBlocks.size() - 1) {
518 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
519 }
520}
521
522// This function populates the ResultBlock with a sequence to calculate the
523// memcmp result. It compares the two loaded source values and returns -1 if
524// src1 < src2 and 1 if src1 > src2.
525void MemCmpExpansion::emitMemCmpResultBlock() {
526 // Special case: if memcmp result is used in a zero equality, result does not
527 // need to be calculated and can simply return 1.
528 if (IsUsedForZeroCmp) {
529 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
530 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
532 PhiRes->addIncoming(Res, ResBlock.BB);
533 BranchInst *NewBr = BranchInst::Create(EndBlock);
534 Builder.Insert(NewBr);
535 if (DTU)
536 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
537 return;
538 }
539 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
540 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
541
542 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
543 ResBlock.PhiSrc2);
544
545 Value *Res =
546 Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
547 ConstantInt::get(Builder.getInt32Ty(), 1));
548
549 PhiRes->addIncoming(Res, ResBlock.BB);
550 BranchInst *NewBr = BranchInst::Create(EndBlock);
551 Builder.Insert(NewBr);
552 if (DTU)
553 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
554}
555
556void MemCmpExpansion::setupResultBlockPHINodes() {
557 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
558 Builder.SetInsertPoint(ResBlock.BB);
559 // Note: this assumes one load per block.
560 ResBlock.PhiSrc1 =
561 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
562 ResBlock.PhiSrc2 =
563 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
564}
565
566void MemCmpExpansion::setupEndBlockPHINodes() {
567 Builder.SetInsertPoint(&EndBlock->front());
568 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
569}
570
571Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
572 unsigned LoadIndex = 0;
573 // This loop populates each of the LoadCmpBlocks with the IR sequence to
574 // handle multiple loads per block.
575 for (unsigned I = 0; I < getNumBlocks(); ++I) {
576 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
577 }
578
579 emitMemCmpResultBlock();
580 return PhiRes;
581}
582
583/// A memcmp expansion that compares equality with 0 and only has one block of
584/// load and compare can bypass the compare, branch, and phi IR that is required
585/// in the general case.
586Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
587 unsigned LoadIndex = 0;
588 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
589 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
590 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
591}
592
593/// A memcmp expansion that only has one block of load and compare can bypass
594/// the compare, branch, and phi IR that is required in the general case.
595Value *MemCmpExpansion::getMemCmpOneBlock() {
596 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
597 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
598
599 // The i8 and i16 cases don't need compares. We zext the loaded values and
600 // subtract them to get the suitable negative, zero, or positive i32 result.
601 if (Size < 4) {
602 const LoadPair Loads =
603 getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
604 /*Offset*/ 0);
605 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
606 }
607
608 const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
609 /*Offset*/ 0);
610 // The result of memcmp is negative, zero, or positive, so produce that by
611 // subtracting 2 extended compare bits: sub (ugt, ult).
612 // If a target prefers to use selects to get -1/0/1, they should be able
613 // to transform this later. The inverse transform (going from selects to math)
614 // may not be possible in the DAG because the selects got converted into
615 // branches before we got there.
616 Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
617 Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
618 Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
619 Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
620 return Builder.CreateSub(ZextUGT, ZextULT);
621}
622
623// This function expands the memcmp call into an inline expansion and returns
624// the memcmp result.
625Value *MemCmpExpansion::getMemCmpExpansion() {
626 // Create the basic block framework for a multi-block expansion.
627 if (getNumBlocks() != 1) {
628 BasicBlock *StartBlock = CI->getParent();
629 EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
630 /*MSSAU=*/nullptr, "endblock");
631 setupEndBlockPHINodes();
632 createResultBlock();
633
634 // If return value of memcmp is not used in a zero equality, we need to
635 // calculate which source was larger. The calculation requires the
636 // two loaded source values of each load compare block.
637 // These will be saved in the phi nodes created by setupResultBlockPHINodes.
638 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
639
640 // Create the number of required load compare basic blocks.
641 createLoadCmpBlocks();
642
643 // Update the terminator added by SplitBlock to branch to the first
644 // LoadCmpBlock.
645 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
646 if (DTU)
647 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
648 {DominatorTree::Delete, StartBlock, EndBlock}});
649 }
650
651 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
652
653 if (IsUsedForZeroCmp)
654 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
655 : getMemCmpExpansionZeroCase();
656
657 if (getNumBlocks() == 1)
658 return getMemCmpOneBlock();
659
660 for (unsigned I = 0; I < getNumBlocks(); ++I) {
661 emitLoadCompareBlock(I);
662 }
663
664 emitMemCmpResultBlock();
665 return PhiRes;
666}
667
668// This function checks to see if an expansion of memcmp can be generated.
669// It checks for constant compare size that is less than the max inline size.
670// If an expansion cannot occur, returns false to leave as a library call.
671// Otherwise, the library call is replaced with a new IR instruction sequence.
672/// We want to transform:
673/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
674/// To:
675/// loadbb:
676/// %0 = bitcast i32* %buffer2 to i8*
677/// %1 = bitcast i32* %buffer1 to i8*
678/// %2 = bitcast i8* %1 to i64*
679/// %3 = bitcast i8* %0 to i64*
680/// %4 = load i64, i64* %2
681/// %5 = load i64, i64* %3
682/// %6 = call i64 @llvm.bswap.i64(i64 %4)
683/// %7 = call i64 @llvm.bswap.i64(i64 %5)
684/// %8 = sub i64 %6, %7
685/// %9 = icmp ne i64 %8, 0
686/// br i1 %9, label %res_block, label %loadbb1
687/// res_block: ; preds = %loadbb2,
688/// %loadbb1, %loadbb
689/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
690/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
691/// %10 = icmp ult i64 %phi.src1, %phi.src2
692/// %11 = select i1 %10, i32 -1, i32 1
693/// br label %endblock
694/// loadbb1: ; preds = %loadbb
695/// %12 = bitcast i32* %buffer2 to i8*
696/// %13 = bitcast i32* %buffer1 to i8*
697/// %14 = bitcast i8* %13 to i32*
698/// %15 = bitcast i8* %12 to i32*
699/// %16 = getelementptr i32, i32* %14, i32 2
700/// %17 = getelementptr i32, i32* %15, i32 2
701/// %18 = load i32, i32* %16
702/// %19 = load i32, i32* %17
703/// %20 = call i32 @llvm.bswap.i32(i32 %18)
704/// %21 = call i32 @llvm.bswap.i32(i32 %19)
705/// %22 = zext i32 %20 to i64
706/// %23 = zext i32 %21 to i64
707/// %24 = sub i64 %22, %23
708/// %25 = icmp ne i64 %24, 0
709/// br i1 %25, label %res_block, label %loadbb2
710/// loadbb2: ; preds = %loadbb1
711/// %26 = bitcast i32* %buffer2 to i8*
712/// %27 = bitcast i32* %buffer1 to i8*
713/// %28 = bitcast i8* %27 to i16*
714/// %29 = bitcast i8* %26 to i16*
715/// %30 = getelementptr i16, i16* %28, i16 6
716/// %31 = getelementptr i16, i16* %29, i16 6
717/// %32 = load i16, i16* %30
718/// %33 = load i16, i16* %31
719/// %34 = call i16 @llvm.bswap.i16(i16 %32)
720/// %35 = call i16 @llvm.bswap.i16(i16 %33)
721/// %36 = zext i16 %34 to i64
722/// %37 = zext i16 %35 to i64
723/// %38 = sub i64 %36, %37
724/// %39 = icmp ne i64 %38, 0
725/// br i1 %39, label %res_block, label %loadbb3
726/// loadbb3: ; preds = %loadbb2
727/// %40 = bitcast i32* %buffer2 to i8*
728/// %41 = bitcast i32* %buffer1 to i8*
729/// %42 = getelementptr i8, i8* %41, i8 14
730/// %43 = getelementptr i8, i8* %40, i8 14
731/// %44 = load i8, i8* %42
732/// %45 = load i8, i8* %43
733/// %46 = zext i8 %44 to i32
734/// %47 = zext i8 %45 to i32
735/// %48 = sub i32 %46, %47
736/// br label %endblock
737/// endblock: ; preds = %res_block,
738/// %loadbb3
739/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
740/// ret i32 %phi.res
741static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
742 const TargetLowering *TLI, const DataLayout *DL,
744 DomTreeUpdater *DTU, const bool IsBCmp) {
745 NumMemCmpCalls++;
746
747 // Early exit from expansion if -Oz.
748 if (CI->getFunction()->hasMinSize())
749 return false;
750
751 // Early exit from expansion if size is not a constant.
752 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
753 if (!SizeCast) {
754 NumMemCmpNotConstant++;
755 return false;
756 }
757 const uint64_t SizeVal = SizeCast->getZExtValue();
758
759 if (SizeVal == 0) {
760 return false;
761 }
762 // TTI call to check if target would like to expand memcmp. Also, get the
763 // available load sizes.
764 const bool IsUsedForZeroCmp =
766 bool OptForSize = CI->getFunction()->hasOptSize() ||
767 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
768 auto Options = TTI->enableMemCmpExpansion(OptForSize,
769 IsUsedForZeroCmp);
770 if (!Options) return false;
771
772 if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
773 Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
774
775 if (OptForSize &&
776 MaxLoadsPerMemcmpOptSize.getNumOccurrences())
777 Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
778
779 if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
780 Options.MaxNumLoads = MaxLoadsPerMemcmp;
781
782 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
783
784 // Don't expand if this will require more loads than desired by the target.
785 if (Expansion.getNumLoads() == 0) {
786 NumMemCmpGreaterThanMax++;
787 return false;
788 }
789
790 NumMemCmpInlined++;
791
792 Value *Res = Expansion.getMemCmpExpansion();
793
794 // Replace call with result of expansion and erase call.
795 CI->replaceAllUsesWith(Res);
796 CI->eraseFromParent();
797
798 return true;
799}
800
801class ExpandMemCmpPass : public FunctionPass {
802public:
803 static char ID;
804
805 ExpandMemCmpPass() : FunctionPass(ID) {
807 }
808
809 bool runOnFunction(Function &F) override {
810 if (skipFunction(F)) return false;
811
812 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
813 if (!TPC) {
814 return false;
815 }
816 const TargetLowering* TL =
817 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
818
819 const TargetLibraryInfo *TLI =
820 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
821 const TargetTransformInfo *TTI =
822 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
823 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
824 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
825 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
826 nullptr;
827 DominatorTree *DT = nullptr;
828 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
829 DT = &DTWP->getDomTree();
830 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
831 return !PA.areAllPreserved();
832 }
833
834private:
835 void getAnalysisUsage(AnalysisUsage &AU) const override {
842 }
843
846 const TargetLowering *TL, ProfileSummaryInfo *PSI,
848 // Returns true if a change was made.
849 bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
850 const TargetTransformInfo *TTI, const TargetLowering *TL,
851 const DataLayout &DL, ProfileSummaryInfo *PSI,
853};
854
855bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
857 const TargetLowering *TL,
858 const DataLayout &DL, ProfileSummaryInfo *PSI,
860 DomTreeUpdater *DTU) {
861 for (Instruction& I : BB) {
862 CallInst *CI = dyn_cast<CallInst>(&I);
863 if (!CI) {
864 continue;
865 }
867 if (TLI->getLibFunc(*CI, Func) &&
868 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
869 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
870 return true;
871 }
872 }
873 return false;
874}
875
877ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI,
879 const TargetLowering *TL, ProfileSummaryInfo *PSI,
881 std::optional<DomTreeUpdater> DTU;
882 if (DT)
883 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
884
885 const DataLayout& DL = F.getParent()->getDataLayout();
886 bool MadeChanges = false;
887 for (auto BBIt = F.begin(); BBIt != F.end();) {
888 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
889 MadeChanges = true;
890 // If changes were made, restart the function from the beginning, since
891 // the structure of the function was changed.
892 BBIt = F.begin();
893 } else {
894 ++BBIt;
895 }
896 }
897 if (MadeChanges)
898 for (BasicBlock &BB : F)
900 if (!MadeChanges)
901 return PreservedAnalyses::all();
904 return PA;
905}
906
907} // namespace
908
909char ExpandMemCmpPass::ID = 0;
910INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
911 "Expand memcmp() to load/stores", false, false)
918 "Expand memcmp() to load/stores", false, false)
919
921 return new ExpandMemCmpPass();
922}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
uint64_t Size
static bool runImpl(Function &F, const TargetLowering &TLI)
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
Expand memcmp() to load/stores"
expandmemcmp
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
hexagon widen stores
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:202
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:166
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Instruction & front() const
Definition: BasicBlock.h:338
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:645
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:642
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2570
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:339
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:921
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:717
void initializeExpandMemCmpPassPass(PassRegistry &)
FunctionPass * createExpandMemCmpPass()
@ Or
Bitwise or logical OR of integers.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Returns options for expansion of memcmp. IsZeroCmp is.