LLVM 23.0.0git
LowerMemIntrinsics.cpp
Go to the documentation of this file.
1//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
12#include "llvm/IR/IRBuilder.h"
14#include "llvm/IR/MDBuilder.h"
17#include "llvm/Support/Debug.h"
21#include <limits>
22#include <optional>
23
24#define DEBUG_TYPE "lower-mem-intrinsics"
25
26using namespace llvm;
27
28namespace llvm {
30}
31
32/// \returns \p Len urem \p OpSize, checking for optimization opportunities.
33/// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize.
35 Value *OpSize, unsigned OpSizeVal) {
36 // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem.
37 if (isPowerOf2_32(OpSizeVal))
38 return B.CreateAnd(Len, OpSizeVal - 1);
39 return B.CreateURem(Len, OpSize);
40}
41
42/// \returns (\p Len udiv \p OpSize) mul \p OpSize, checking for optimization
43/// opportunities.
44/// If \p RTLoopRemainder is provided, it must be the result of
45/// \c getRuntimeLoopRemainder() with the same arguments.
47 unsigned OpSizeVal,
48 Value *RTLoopRemainder = nullptr) {
49 if (!RTLoopRemainder)
50 RTLoopRemainder = getRuntimeLoopRemainder(B, Len, OpSize, OpSizeVal);
51 return B.CreateSub(Len, RTLoopRemainder);
52}
53
54namespace {
55/// Container for the return values of insertLoopExpansion.
56struct LoopExpansionInfo {
57 /// The instruction at the end of the main loop body.
58 Instruction *MainLoopIP = nullptr;
59
60 /// The unit index in the main loop body.
61 Value *MainLoopIndex = nullptr;
62
63 /// The instruction at the end of the residual loop body. Can be nullptr if no
64 /// residual is required.
65 Instruction *ResidualLoopIP = nullptr;
66
67 /// The unit index in the residual loop body. Can be nullptr if no residual is
68 /// required.
69 Value *ResidualLoopIndex = nullptr;
70};
71
72std::optional<uint64_t> getAverageMemOpLoopTripCount(const MemIntrinsic &I) {
74 return std::nullopt;
75 if (std::optional<Function::ProfileCount> EC =
76 I.getFunction()->getEntryCount();
77 !EC || !EC->getCount())
78 return std::nullopt;
79 if (const auto Len = I.getLengthInBytes())
80 return Len->getZExtValue();
81 uint64_t Total = 0;
83 getValueProfDataFromInst(I, InstrProfValueKind::IPVK_MemOPSize,
84 std::numeric_limits<uint32_t>::max(), Total);
85 if (!Total)
86 return std::nullopt;
87 uint64_t TripCount = 0;
88 for (const auto &P : ProfData)
89 TripCount += P.Count * P.Value;
90 return std::round(1.0 * TripCount / Total);
91}
92
93} // namespace
94
95/// Insert the control flow and loop counters for a memcpy/memset loop
96/// expansion.
97///
98/// This function inserts IR corresponding to the following C code before
99/// \p InsertBefore:
100/// \code
101/// LoopUnits = (Len / MainLoopStep) * MainLoopStep;
102/// ResidualUnits = Len - LoopUnits;
103/// MainLoopIndex = 0;
104/// if (LoopUnits > 0) {
105/// do {
106/// // MainLoopIP
107/// MainLoopIndex += MainLoopStep;
108/// } while (MainLoopIndex < LoopUnits);
109/// }
110/// for (size_t i = 0; i < ResidualUnits; i += ResidualLoopStep) {
111/// ResidualLoopIndex = LoopUnits + i;
112/// // ResidualLoopIP
113/// }
114/// \endcode
115///
116/// \p MainLoopStep and \p ResidualLoopStep determine by how many "units" the
117/// loop index is increased in each iteration of the main and residual loops,
118/// respectively. In most cases, the "unit" will be bytes, but larger units are
119/// useful for lowering memset.pattern.
120///
121/// The computation of \c LoopUnits and \c ResidualUnits is performed at compile
122/// time if \p Len is a \c ConstantInt.
123/// The second (residual) loop is omitted if \p ResidualLoopStep is 0 or equal
124/// to \p MainLoopStep.
125/// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
126/// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
127static LoopExpansionInfo
129 unsigned MainLoopStep, unsigned ResidualLoopStep,
130 StringRef BBNamePrefix,
131 std::optional<uint64_t> AverageTripCount) {
132 assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
133 "ResidualLoopStep must divide MainLoopStep if specified");
134 assert(ResidualLoopStep <= MainLoopStep &&
135 "ResidualLoopStep cannot be larger than MainLoopStep");
136 assert(MainLoopStep > 0 && "MainLoopStep must be non-zero");
137 LoopExpansionInfo LEI;
138 BasicBlock *PreLoopBB = InsertBefore->getParent();
139 BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(
140 InsertBefore, BBNamePrefix + "-post-expansion");
141 Function *ParentFunc = PreLoopBB->getParent();
142 LLVMContext &Ctx = PreLoopBB->getContext();
143 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
144 IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
145 PreLoopBuilder.SetCurrentDebugLocation(DbgLoc);
146
147 // Calculate the main loop trip count and remaining units to cover after the
148 // loop.
149 Type *LenType = Len->getType();
150 IntegerType *ILenType = cast<IntegerType>(LenType);
151 ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
152
153 Value *LoopUnits = Len;
154 Value *ResidualUnits = nullptr;
155 // We can make a conditional branch unconditional if we know that the
156 // MainLoop must be executed at least once.
157 bool MustTakeMainLoop = false;
158 if (MainLoopStep != 1) {
159 if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
160 uint64_t TotalUnits = CLen->getZExtValue();
161 uint64_t LoopEndCount = alignDown(TotalUnits, MainLoopStep);
162 uint64_t ResidualCount = TotalUnits - LoopEndCount;
163 LoopUnits = ConstantInt::get(LenType, LoopEndCount);
164 ResidualUnits = ConstantInt::get(LenType, ResidualCount);
165 MustTakeMainLoop = LoopEndCount > 0;
166 // As an optimization, we could skip generating the residual loop if
167 // ResidualCount is known to be 0. However, current uses of this function
168 // don't request a residual loop if the length is constant (they generate
169 // a (potentially empty) sequence of loads and stores instead), so this
170 // optimization would have no effect here.
171 } else {
172 ResidualUnits = getRuntimeLoopRemainder(PreLoopBuilder, Len,
173 CIMainLoopStep, MainLoopStep);
174 LoopUnits = getRuntimeLoopUnits(PreLoopBuilder, Len, CIMainLoopStep,
175 MainLoopStep, ResidualUnits);
176 }
177 } else if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
178 MustTakeMainLoop = CLen->getZExtValue() > 0;
179 }
180
181 BasicBlock *MainLoopBB = BasicBlock::Create(
182 Ctx, BBNamePrefix + "-expansion-main-body", ParentFunc, PostLoopBB);
183 IRBuilder<> LoopBuilder(MainLoopBB);
184 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
185
186 PHINode *LoopIndex = LoopBuilder.CreatePHI(LenType, 2, "loop-index");
187 LEI.MainLoopIndex = LoopIndex;
188 LoopIndex->addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
189
190 Value *NewIndex =
191 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(LenType, MainLoopStep));
192 LoopIndex->addIncoming(NewIndex, MainLoopBB);
193
194 // One argument of the addition is a loop-variant PHI, so it must be an
195 // Instruction (i.e., it cannot be a Constant).
196 LEI.MainLoopIP = cast<Instruction>(NewIndex);
197
198 if (ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep) {
199 // Loop body for the residual accesses.
200 BasicBlock *ResLoopBB =
201 BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-body",
202 PreLoopBB->getParent(), PostLoopBB);
203 // BB to check if the residual loop is needed.
204 BasicBlock *ResidualCondBB =
205 BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-cond",
206 PreLoopBB->getParent(), ResLoopBB);
207
208 // Enter the MainLoop unless no main loop iteration is required.
209 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
210 if (MustTakeMainLoop)
211 PreLoopBuilder.CreateBr(MainLoopBB);
212 else {
213 auto *BR = PreLoopBuilder.CreateCondBr(
214 PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), MainLoopBB,
215 ResidualCondBB);
216 if (AverageTripCount.has_value()) {
217 MDBuilder MDB(ParentFunc->getContext());
219 {AverageTripCount.value() % MainLoopStep, 1},
220 /*IsExpected=*/false);
221 } else {
223 }
224 }
225 PreLoopBB->getTerminator()->eraseFromParent();
226
227 // Stay in the MainLoop until we have handled all the LoopUnits. Then go to
228 // the residual condition BB.
229 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopUnits),
230 MainLoopBB, ResidualCondBB);
231
232 // Determine if we need to branch to the residual loop or bypass it.
233 IRBuilder<> RCBuilder(ResidualCondBB);
234 RCBuilder.SetCurrentDebugLocation(DbgLoc);
235 RCBuilder.CreateCondBr(RCBuilder.CreateICmpNE(ResidualUnits, Zero),
236 ResLoopBB, PostLoopBB);
237
238 IRBuilder<> ResBuilder(ResLoopBB);
239 ResBuilder.SetCurrentDebugLocation(DbgLoc);
240 PHINode *ResidualIndex =
241 ResBuilder.CreatePHI(LenType, 2, "residual-loop-index");
242 ResidualIndex->addIncoming(Zero, ResidualCondBB);
243
244 // Add the offset at the end of the main loop to the loop counter of the
245 // residual loop to get the proper index.
246 Value *FullOffset = ResBuilder.CreateAdd(LoopUnits, ResidualIndex);
247 LEI.ResidualLoopIndex = FullOffset;
248
249 Value *ResNewIndex = ResBuilder.CreateAdd(
250 ResidualIndex, ConstantInt::get(LenType, ResidualLoopStep));
251 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
252
253 // One argument of the addition is a loop-variant PHI, so it must be an
254 // Instruction (i.e., it cannot be a Constant).
255 LEI.ResidualLoopIP = cast<Instruction>(ResNewIndex);
256
257 // Stay in the residual loop until all ResidualUnits are handled.
258 ResBuilder.CreateCondBr(
259 ResBuilder.CreateICmpULT(ResNewIndex, ResidualUnits), ResLoopBB,
260 PostLoopBB);
261 } else {
262 // There is no need for a residual loop after the main loop. We do however
263 // need to patch up the control flow by creating the terminators for the
264 // preloop block and the main loop.
265
266 // Enter the MainLoop unless no main loop iteration is required.
267 if (MustTakeMainLoop) {
268 PreLoopBuilder.CreateBr(MainLoopBB);
269 } else {
270 ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
271 MDBuilder B(ParentFunc->getContext());
272 PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero),
273 MainLoopBB, PostLoopBB,
274 B.createLikelyBranchWeights());
275 }
276 PreLoopBB->getTerminator()->eraseFromParent();
277 // Stay in the MainLoop until we have handled all the LoopUnits.
278 auto *Br = LoopBuilder.CreateCondBr(
279 LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
280 if (AverageTripCount.has_value())
281 setFittedBranchWeights(*Br, {AverageTripCount.value() / MainLoopStep, 1},
282 /*IsExpected=*/false);
283 else
285 }
286 return LEI;
287}
288
290 Value *DstAddr, ConstantInt *CopyLen,
291 Align SrcAlign, Align DstAlign,
292 bool SrcIsVolatile, bool DstIsVolatile,
293 bool CanOverlap,
295 std::optional<uint32_t> AtomicElementSize,
296 std::optional<uint64_t> AverageTripCount) {
297 // No need to expand zero length copies.
298 if (CopyLen->isZero())
299 return;
300
301 BasicBlock *PreLoopBB = InsertBefore->getParent();
302 Function *ParentFunc = PreLoopBB->getParent();
303 LLVMContext &Ctx = PreLoopBB->getContext();
304 const DataLayout &DL = ParentFunc->getDataLayout();
305 MDBuilder MDB(Ctx);
306 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
307 StringRef Name = "MemCopyAliasScope";
308 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
309
310 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
311 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
312
313 Type *TypeOfCopyLen = CopyLen->getType();
314 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
315 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
316 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
317 "Atomic memcpy lowering is not supported for vector operand type");
318
319 Type *Int8Type = Type::getInt8Ty(Ctx);
320 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
321 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
322 "Atomic memcpy lowering is not supported for selected operand size");
323
324 uint64_t LoopEndCount = alignDown(CopyLen->getZExtValue(), LoopOpSize);
325
326 // Skip the loop expansion entirely if the loop would never be taken.
327 if (LoopEndCount != 0) {
328 LoopExpansionInfo LEI =
329 insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, 0,
330 "static-memcpy", AverageTripCount);
331
332 // Fill MainLoopBB
333 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
334 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
335 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
336
337 // If we used LoopOpType as GEP element type, we would iterate over the
338 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
339 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use
340 // byte offsets computed from the TypeStoreSize.
341 Value *SrcGEP =
342 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
343 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
344 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
345 if (!CanOverlap) {
346 // Set alias scope for loads.
347 Load->setMetadata(LLVMContext::MD_alias_scope,
348 MDNode::get(Ctx, NewScope));
349 }
350 Value *DstGEP =
351 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
352 StoreInst *Store = MainLoopBuilder.CreateAlignedStore(
353 Load, DstGEP, PartDstAlign, DstIsVolatile);
354 if (!CanOverlap) {
355 // Indicate that stores don't overlap loads.
356 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
357 }
358 if (AtomicElementSize) {
359 Load->setAtomic(AtomicOrdering::Unordered);
360 Store->setAtomic(AtomicOrdering::Unordered);
361 }
362 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
363 "No residual loop was requested");
364 }
365
366 // Copy the remaining bytes with straight-line code.
367 uint64_t BytesCopied = LoopEndCount;
368 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
369 if (RemainingBytes == 0)
370 return;
371
372 IRBuilder<> RBuilder(InsertBefore);
373 SmallVector<Type *, 5> RemainingOps;
374 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
375 SrcAS, DstAS, SrcAlign, DstAlign,
376 AtomicElementSize);
377
378 for (auto *OpTy : RemainingOps) {
379 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied));
380 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied));
381
382 unsigned OperandSize = DL.getTypeStoreSize(OpTy);
383 assert((!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
384 "Atomic memcpy lowering is not supported for selected operand size");
385
386 Value *SrcGEP = RBuilder.CreateInBoundsGEP(
387 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
388 LoadInst *Load =
389 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile);
390 if (!CanOverlap) {
391 // Set alias scope for loads.
392 Load->setMetadata(LLVMContext::MD_alias_scope,
393 MDNode::get(Ctx, NewScope));
394 }
395 Value *DstGEP = RBuilder.CreateInBoundsGEP(
396 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
397 StoreInst *Store =
398 RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
399 if (!CanOverlap) {
400 // Indicate that stores don't overlap loads.
401 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
402 }
403 if (AtomicElementSize) {
404 Load->setAtomic(AtomicOrdering::Unordered);
405 Store->setAtomic(AtomicOrdering::Unordered);
406 }
407 BytesCopied += OperandSize;
408 }
409 assert(BytesCopied == CopyLen->getZExtValue() &&
410 "Bytes copied should match size in the call!");
411}
412
414 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
415 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
416 bool CanOverlap, const TargetTransformInfo &TTI,
417 std::optional<uint32_t> AtomicElementSize,
418 std::optional<uint64_t> AverageTripCount) {
419 BasicBlock *PreLoopBB = InsertBefore->getParent();
420 Function *ParentFunc = PreLoopBB->getParent();
421 const DataLayout &DL = ParentFunc->getDataLayout();
422 LLVMContext &Ctx = PreLoopBB->getContext();
423 MDBuilder MDB(Ctx);
424 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain");
425 StringRef Name = "MemCopyAliasScope";
426 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
427
428 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
429 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
430
431 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
432 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
433 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
434 "Atomic memcpy lowering is not supported for vector operand type");
435 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
436 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
437 "Atomic memcpy lowering is not supported for selected operand size");
438
439 Type *Int8Type = Type::getInt8Ty(Ctx);
440
441 Type *ResidualLoopOpType = AtomicElementSize
442 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8)
443 : Int8Type;
444 unsigned ResidualLoopOpSize = DL.getTypeStoreSize(ResidualLoopOpType);
445 assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
446 "Store size is expected to match type size");
447
448 LoopExpansionInfo LEI =
449 insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize,
450 "dynamic-memcpy", AverageTripCount);
451
452 // Fill MainLoopBB
453 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
454 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
455 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
456
457 // If we used LoopOpType as GEP element type, we would iterate over the
458 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
459 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use byte
460 // offsets computed from the TypeStoreSize.
461 Value *SrcGEP =
462 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LEI.MainLoopIndex);
463 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
464 LoopOpType, SrcGEP, PartSrcAlign, SrcIsVolatile);
465 if (!CanOverlap) {
466 // Set alias scope for loads.
467 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope));
468 }
469 Value *DstGEP =
470 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LEI.MainLoopIndex);
471 StoreInst *Store = MainLoopBuilder.CreateAlignedStore(
472 Load, DstGEP, PartDstAlign, DstIsVolatile);
473 if (!CanOverlap) {
474 // Indicate that stores don't overlap loads.
475 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
476 }
477 if (AtomicElementSize) {
480 }
481
482 // Fill ResidualLoopBB.
483 if (!LEI.ResidualLoopIP)
484 return;
485
486 Align ResSrcAlign(commonAlignment(PartSrcAlign, ResidualLoopOpSize));
487 Align ResDstAlign(commonAlignment(PartDstAlign, ResidualLoopOpSize));
488
489 IRBuilder<> ResLoopBuilder(LEI.ResidualLoopIP);
490 Value *ResSrcGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
491 LEI.ResidualLoopIndex);
492 LoadInst *ResLoad = ResLoopBuilder.CreateAlignedLoad(
493 ResidualLoopOpType, ResSrcGEP, ResSrcAlign, SrcIsVolatile);
494 if (!CanOverlap) {
495 // Set alias scope for loads.
496 ResLoad->setMetadata(LLVMContext::MD_alias_scope,
497 MDNode::get(Ctx, NewScope));
498 }
499 Value *ResDstGEP = ResLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
500 LEI.ResidualLoopIndex);
501 StoreInst *ResStore = ResLoopBuilder.CreateAlignedStore(
502 ResLoad, ResDstGEP, ResDstAlign, DstIsVolatile);
503 if (!CanOverlap) {
504 // Indicate that stores don't overlap loads.
505 ResStore->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope));
506 }
507 if (AtomicElementSize) {
510 }
511}
512
513// If \p Addr1 and \p Addr2 are pointers to different address spaces, create an
514// addresspacecast to obtain a pair of pointers in the same addressspace. The
515// caller needs to ensure that addrspacecasting is possible.
516// No-op if the pointers are in the same address space.
517static std::pair<Value *, Value *>
519 const TargetTransformInfo &TTI) {
520 Value *ResAddr1 = Addr1;
521 Value *ResAddr2 = Addr2;
522
523 unsigned AS1 = cast<PointerType>(Addr1->getType())->getAddressSpace();
524 unsigned AS2 = cast<PointerType>(Addr2->getType())->getAddressSpace();
525 if (AS1 != AS2) {
526 if (TTI.isValidAddrSpaceCast(AS2, AS1))
527 ResAddr2 = B.CreateAddrSpaceCast(Addr2, Addr1->getType());
528 else if (TTI.isValidAddrSpaceCast(AS1, AS2))
529 ResAddr1 = B.CreateAddrSpaceCast(Addr1, Addr2->getType());
530 else
531 llvm_unreachable("Can only lower memmove between address spaces if they "
532 "support addrspacecast");
533 }
534 return {ResAddr1, ResAddr2};
535}
536
537// Lower memmove to IR. memmove is required to correctly copy overlapping memory
538// regions; therefore, it has to check the relative positions of the source and
539// destination pointers and choose the copy direction accordingly.
540//
541// The code below is an IR rendition of this C function:
542//
543// void* memmove(void* dst, const void* src, size_t n) {
544// unsigned char* d = dst;
545// const unsigned char* s = src;
546// if (s < d) {
547// // copy backwards
548// while (n--) {
549// d[n] = s[n];
550// }
551// } else {
552// // copy forward
553// for (size_t i = 0; i < n; ++i) {
554// d[i] = s[i];
555// }
556// }
557// return dst;
558// }
559//
560// If the TargetTransformInfo specifies a wider MemcpyLoopLoweringType, it is
561// used for the memory accesses in the loops. Then, additional loops with
562// byte-wise accesses are added for the remaining bytes.
564 Value *SrcAddr, Value *DstAddr,
565 Value *CopyLen, Align SrcAlign,
566 Align DstAlign, bool SrcIsVolatile,
567 bool DstIsVolatile,
568 const TargetTransformInfo &TTI) {
569 Type *TypeOfCopyLen = CopyLen->getType();
570 BasicBlock *OrigBB = InsertBefore->getParent();
571 Function *F = OrigBB->getParent();
572 const DataLayout &DL = F->getDataLayout();
573 LLVMContext &Ctx = OrigBB->getContext();
574 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
575 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
576
577 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
578 SrcAlign, DstAlign);
579 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
580 Type *Int8Type = Type::getInt8Ty(Ctx);
581 bool LoopOpIsInt8 = LoopOpType == Int8Type;
582
583 // If the memory accesses are wider than one byte, residual loops with
584 // i8-accesses are required to move remaining bytes.
585 bool RequiresResidual = !LoopOpIsInt8;
586
587 Type *ResidualLoopOpType = Int8Type;
588 unsigned ResidualLoopOpSize = DL.getTypeStoreSize(ResidualLoopOpType);
589
590 // Calculate the loop trip count and remaining bytes to copy after the loop.
591 IntegerType *ILengthType = cast<IntegerType>(TypeOfCopyLen);
592 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
593 ConstantInt *CIResidualLoopOpSize =
594 ConstantInt::get(ILengthType, ResidualLoopOpSize);
595 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
596
597 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
598 IRBuilder<> PLBuilder(InsertBefore);
599 PLBuilder.SetCurrentDebugLocation(DbgLoc);
600
601 Value *RuntimeLoopBytes = CopyLen;
602 Value *RuntimeLoopRemainder = nullptr;
603 Value *SkipResidualCondition = nullptr;
604 if (RequiresResidual) {
605 RuntimeLoopRemainder =
606 getRuntimeLoopRemainder(PLBuilder, CopyLen, CILoopOpSize, LoopOpSize);
607 RuntimeLoopBytes = getRuntimeLoopUnits(PLBuilder, CopyLen, CILoopOpSize,
608 LoopOpSize, RuntimeLoopRemainder);
609 SkipResidualCondition =
610 PLBuilder.CreateICmpEQ(RuntimeLoopRemainder, Zero, "skip_residual");
611 }
612 Value *SkipMainCondition =
613 PLBuilder.CreateICmpEQ(RuntimeLoopBytes, Zero, "skip_main");
614
615 // Create the a comparison of src and dst, based on which we jump to either
616 // the forward-copy part of the function (if src >= dst) or the backwards-copy
617 // part (if src < dst).
618 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
619 // structure. Its block terminators (unconditional branches) are replaced by
620 // the appropriate conditional branches when the loop is built.
621 // If the pointers are in different address spaces, they need to be converted
622 // to a compatible one. Cases where memory ranges in the different address
623 // spaces cannot overlap are lowered as memcpy and not handled here.
624 auto [CmpSrcAddr, CmpDstAddr] =
625 tryInsertCastToCommonAddrSpace(PLBuilder, SrcAddr, DstAddr, TTI);
626 Value *PtrCompare =
627 PLBuilder.CreateICmpULT(CmpSrcAddr, CmpDstAddr, "compare_src_dst");
628 Instruction *ThenTerm, *ElseTerm;
629 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(),
630 &ThenTerm, &ElseTerm);
631
632 // If the LoopOpSize is greater than 1, each part of the function consists of
633 // four blocks:
634 // memmove_copy_backwards:
635 // skip the residual loop when 0 iterations are required
636 // memmove_bwd_residual_loop:
637 // copy the last few bytes individually so that the remaining length is
638 // a multiple of the LoopOpSize
639 // memmove_bwd_middle: skip the main loop when 0 iterations are required
640 // memmove_bwd_main_loop: the actual backwards loop BB with wide accesses
641 // memmove_copy_forward: skip the main loop when 0 iterations are required
642 // memmove_fwd_main_loop: the actual forward loop BB with wide accesses
643 // memmove_fwd_middle: skip the residual loop when 0 iterations are required
644 // memmove_fwd_residual_loop: copy the last few bytes individually
645 //
646 // The main and residual loop are switched between copying forward and
647 // backward so that the residual loop always operates on the end of the moved
648 // range. This is based on the assumption that buffers whose start is aligned
649 // with the LoopOpSize are more common than buffers whose end is.
650 //
651 // If the LoopOpSize is 1, each part of the function consists of two blocks:
652 // memmove_copy_backwards: skip the loop when 0 iterations are required
653 // memmove_bwd_main_loop: the actual backwards loop BB
654 // memmove_copy_forward: skip the loop when 0 iterations are required
655 // memmove_fwd_main_loop: the actual forward loop BB
656 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
657 CopyBackwardsBB->setName("memmove_copy_backwards");
658 BasicBlock *CopyForwardBB = ElseTerm->getParent();
659 CopyForwardBB->setName("memmove_copy_forward");
660 BasicBlock *ExitBB = InsertBefore->getParent();
661 ExitBB->setName("memmove_done");
662
663 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
664 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
665
666 // Accesses in the residual loops do not share the same alignment as those in
667 // the main loops.
668 Align ResidualSrcAlign(commonAlignment(PartSrcAlign, ResidualLoopOpSize));
669 Align ResidualDstAlign(commonAlignment(PartDstAlign, ResidualLoopOpSize));
670
671 // Copying backwards.
672 {
673 BasicBlock *MainLoopBB = BasicBlock::Create(
674 F->getContext(), "memmove_bwd_main_loop", F, CopyForwardBB);
675
676 // The predecessor of the memmove_bwd_main_loop. Updated in the
677 // following if a residual loop is emitted first.
678 BasicBlock *PredBB = CopyBackwardsBB;
679
680 if (RequiresResidual) {
681 // backwards residual loop
682 BasicBlock *ResidualLoopBB = BasicBlock::Create(
683 F->getContext(), "memmove_bwd_residual_loop", F, MainLoopBB);
684 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB);
685 ResidualLoopBuilder.SetCurrentDebugLocation(DbgLoc);
686 PHINode *ResidualLoopPhi = ResidualLoopBuilder.CreatePHI(ILengthType, 0);
687 Value *ResidualIndex = ResidualLoopBuilder.CreateSub(
688 ResidualLoopPhi, CIResidualLoopOpSize, "bwd_residual_index");
689 // If we used LoopOpType as GEP element type, we would iterate over the
690 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes,
691 // i.e., we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore,
692 // use byte offsets computed from the TypeStoreSize.
693 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
694 ResidualIndex);
695 Value *Element = ResidualLoopBuilder.CreateAlignedLoad(
696 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
697 "element");
698 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
699 ResidualIndex);
700 ResidualLoopBuilder.CreateAlignedStore(Element, StoreGEP,
701 ResidualDstAlign, DstIsVolatile);
702
703 // After the residual loop, go to an intermediate block.
704 BasicBlock *IntermediateBB = BasicBlock::Create(
705 F->getContext(), "memmove_bwd_middle", F, MainLoopBB);
706 // Later code expects a terminator in the PredBB.
707 IRBuilder<> IntermediateBuilder(IntermediateBB);
708 IntermediateBuilder.SetCurrentDebugLocation(DbgLoc);
709 IntermediateBuilder.CreateUnreachable();
710 ResidualLoopBuilder.CreateCondBr(
711 ResidualLoopBuilder.CreateICmpEQ(ResidualIndex, RuntimeLoopBytes),
712 IntermediateBB, ResidualLoopBB);
713
714 ResidualLoopPhi->addIncoming(ResidualIndex, ResidualLoopBB);
715 ResidualLoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
716
717 // How to get to the residual:
718 BranchInst *BrInst =
719 BranchInst::Create(IntermediateBB, ResidualLoopBB,
720 SkipResidualCondition, ThenTerm->getIterator());
721 BrInst->setDebugLoc(DbgLoc);
722 ThenTerm->eraseFromParent();
723
724 PredBB = IntermediateBB;
725 }
726
727 // main loop
728 IRBuilder<> MainLoopBuilder(MainLoopBB);
729 MainLoopBuilder.SetCurrentDebugLocation(DbgLoc);
730 PHINode *MainLoopPhi = MainLoopBuilder.CreatePHI(ILengthType, 0);
731 Value *MainIndex =
732 MainLoopBuilder.CreateSub(MainLoopPhi, CILoopOpSize, "bwd_main_index");
733 Value *LoadGEP =
734 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, MainIndex);
735 Value *Element = MainLoopBuilder.CreateAlignedLoad(
736 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
737 Value *StoreGEP =
738 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, MainIndex);
739 MainLoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
740 DstIsVolatile);
741 MainLoopBuilder.CreateCondBr(MainLoopBuilder.CreateICmpEQ(MainIndex, Zero),
742 ExitBB, MainLoopBB);
743 MainLoopPhi->addIncoming(MainIndex, MainLoopBB);
744 MainLoopPhi->addIncoming(RuntimeLoopBytes, PredBB);
745
746 // How to get to the main loop:
747 Instruction *PredBBTerm = PredBB->getTerminator();
749 ExitBB, MainLoopBB, SkipMainCondition, PredBBTerm->getIterator());
750 BrInst->setDebugLoc(DbgLoc);
751 PredBBTerm->eraseFromParent();
752 }
753
754 // Copying forward.
755 // main loop
756 {
757 BasicBlock *MainLoopBB =
758 BasicBlock::Create(F->getContext(), "memmove_fwd_main_loop", F, ExitBB);
759 IRBuilder<> MainLoopBuilder(MainLoopBB);
760 MainLoopBuilder.SetCurrentDebugLocation(DbgLoc);
761 PHINode *MainLoopPhi =
762 MainLoopBuilder.CreatePHI(ILengthType, 0, "fwd_main_index");
763 Value *LoadGEP =
764 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, MainLoopPhi);
765 Value *Element = MainLoopBuilder.CreateAlignedLoad(
766 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
767 Value *StoreGEP =
768 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, MainLoopPhi);
769 MainLoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
770 DstIsVolatile);
771 Value *MainIndex = MainLoopBuilder.CreateAdd(MainLoopPhi, CILoopOpSize);
772 MainLoopPhi->addIncoming(MainIndex, MainLoopBB);
773 MainLoopPhi->addIncoming(Zero, CopyForwardBB);
774
775 Instruction *CopyFwdBBTerm = CopyForwardBB->getTerminator();
776 BasicBlock *SuccessorBB = ExitBB;
777 if (RequiresResidual)
778 SuccessorBB =
779 BasicBlock::Create(F->getContext(), "memmove_fwd_middle", F, ExitBB);
780
781 // leaving or staying in the main loop
782 MainLoopBuilder.CreateCondBr(
783 MainLoopBuilder.CreateICmpEQ(MainIndex, RuntimeLoopBytes), SuccessorBB,
784 MainLoopBB);
785
786 // getting in or skipping the main loop
787 BranchInst *BrInst =
788 BranchInst::Create(SuccessorBB, MainLoopBB, SkipMainCondition,
789 CopyFwdBBTerm->getIterator());
790 BrInst->setDebugLoc(DbgLoc);
791 CopyFwdBBTerm->eraseFromParent();
792
793 if (RequiresResidual) {
794 BasicBlock *IntermediateBB = SuccessorBB;
795 IRBuilder<> IntermediateBuilder(IntermediateBB);
796 IntermediateBuilder.SetCurrentDebugLocation(DbgLoc);
797 BasicBlock *ResidualLoopBB = BasicBlock::Create(
798 F->getContext(), "memmove_fwd_residual_loop", F, ExitBB);
799 IntermediateBuilder.CreateCondBr(SkipResidualCondition, ExitBB,
800 ResidualLoopBB);
801
802 // Residual loop
803 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB);
804 ResidualLoopBuilder.SetCurrentDebugLocation(DbgLoc);
805 PHINode *ResidualLoopPhi =
806 ResidualLoopBuilder.CreatePHI(ILengthType, 0, "fwd_residual_index");
807 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr,
808 ResidualLoopPhi);
809 Value *Element = ResidualLoopBuilder.CreateAlignedLoad(
810 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile,
811 "element");
812 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr,
813 ResidualLoopPhi);
814 ResidualLoopBuilder.CreateAlignedStore(Element, StoreGEP,
815 ResidualDstAlign, DstIsVolatile);
816 Value *ResidualIndex =
817 ResidualLoopBuilder.CreateAdd(ResidualLoopPhi, CIResidualLoopOpSize);
818 ResidualLoopBuilder.CreateCondBr(
819 ResidualLoopBuilder.CreateICmpEQ(ResidualIndex, CopyLen), ExitBB,
820 ResidualLoopBB);
821 ResidualLoopPhi->addIncoming(ResidualIndex, ResidualLoopBB);
822 ResidualLoopPhi->addIncoming(RuntimeLoopBytes, IntermediateBB);
823 }
824 }
825}
826
827// Similar to createMemMoveLoopUnknownSize, only the trip counts are computed at
828// compile time, obsolete loops and branches are omitted, and the residual code
829// is straight-line code instead of a loop.
830static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
831 Value *SrcAddr, Value *DstAddr,
832 ConstantInt *CopyLen, Align SrcAlign,
833 Align DstAlign, bool SrcIsVolatile,
834 bool DstIsVolatile,
835 const TargetTransformInfo &TTI) {
836 // No need to expand zero length moves.
837 if (CopyLen->isZero())
838 return;
839
840 Type *TypeOfCopyLen = CopyLen->getType();
841 BasicBlock *OrigBB = InsertBefore->getParent();
842 Function *F = OrigBB->getParent();
843 const DataLayout &DL = F->getDataLayout();
844 LLVMContext &Ctx = OrigBB->getContext();
845 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
846 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
847
848 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
849 SrcAlign, DstAlign);
850 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
851 Type *Int8Type = Type::getInt8Ty(Ctx);
852
853 // Calculate the loop trip count and remaining bytes to copy after the loop.
854 uint64_t BytesCopiedInLoop = alignDown(CopyLen->getZExtValue(), LoopOpSize);
855 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopiedInLoop;
856
857 IntegerType *ILengthType = cast<IntegerType>(TypeOfCopyLen);
858 ConstantInt *Zero = ConstantInt::get(ILengthType, 0);
859 ConstantInt *LoopBound = ConstantInt::get(ILengthType, BytesCopiedInLoop);
860 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize);
861
862 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
863 IRBuilder<> PLBuilder(InsertBefore);
864 PLBuilder.SetCurrentDebugLocation(DbgLoc);
865
866 auto [CmpSrcAddr, CmpDstAddr] =
867 tryInsertCastToCommonAddrSpace(PLBuilder, SrcAddr, DstAddr, TTI);
868 Value *PtrCompare =
869 PLBuilder.CreateICmpULT(CmpSrcAddr, CmpDstAddr, "compare_src_dst");
870 Instruction *ThenTerm, *ElseTerm;
871 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(),
872 &ThenTerm, &ElseTerm);
873
874 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
875 BasicBlock *CopyForwardBB = ElseTerm->getParent();
876 BasicBlock *ExitBB = InsertBefore->getParent();
877 ExitBB->setName("memmove_done");
878
879 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize));
880 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize));
881
882 // Helper function to generate a load/store pair of a given type in the
883 // residual. Used in the forward and backward branches.
884 auto GenerateResidualLdStPair = [&](Type *OpTy, IRBuilderBase &Builder,
885 uint64_t &BytesCopied) {
886 Align ResSrcAlign(commonAlignment(SrcAlign, BytesCopied));
887 Align ResDstAlign(commonAlignment(DstAlign, BytesCopied));
888
889 unsigned OperandSize = DL.getTypeStoreSize(OpTy);
890
891 // If we used LoopOpType as GEP element type, we would iterate over the
892 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
893 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use
894 // byte offsets computed from the TypeStoreSize.
895 Value *SrcGEP = Builder.CreateInBoundsGEP(
896 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
897 LoadInst *Load =
898 Builder.CreateAlignedLoad(OpTy, SrcGEP, ResSrcAlign, SrcIsVolatile);
899 Value *DstGEP = Builder.CreateInBoundsGEP(
900 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied));
901 Builder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile);
902 BytesCopied += OperandSize;
903 };
904
905 // Copying backwards.
906 if (RemainingBytes != 0) {
907 CopyBackwardsBB->setName("memmove_bwd_residual");
908 uint64_t BytesCopied = BytesCopiedInLoop;
909
910 // Residual code is required to move the remaining bytes. We need the same
911 // instructions as in the forward case, only in reverse. So we generate code
912 // the same way, except that we change the IRBuilder insert point for each
913 // load/store pair so that each one is inserted before the previous one
914 // instead of after it.
915 IRBuilder<> BwdResBuilder(CopyBackwardsBB,
916 CopyBackwardsBB->getFirstNonPHIIt());
917 BwdResBuilder.SetCurrentDebugLocation(DbgLoc);
918 SmallVector<Type *, 5> RemainingOps;
919 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
920 SrcAS, DstAS, PartSrcAlign,
921 PartDstAlign);
922 for (auto *OpTy : RemainingOps) {
923 // reverse the order of the emitted operations
924 BwdResBuilder.SetInsertPoint(CopyBackwardsBB,
925 CopyBackwardsBB->getFirstNonPHIIt());
926 GenerateResidualLdStPair(OpTy, BwdResBuilder, BytesCopied);
927 }
928 }
929 if (BytesCopiedInLoop != 0) {
930 BasicBlock *LoopBB = CopyBackwardsBB;
931 BasicBlock *PredBB = OrigBB;
932 if (RemainingBytes != 0) {
933 // if we introduce residual code, it needs its separate BB
934 LoopBB = CopyBackwardsBB->splitBasicBlock(
935 CopyBackwardsBB->getTerminator(), "memmove_bwd_loop");
936 PredBB = CopyBackwardsBB;
937 } else {
938 CopyBackwardsBB->setName("memmove_bwd_loop");
939 }
940 IRBuilder<> LoopBuilder(LoopBB->getTerminator());
941 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
942 PHINode *LoopPhi = LoopBuilder.CreatePHI(ILengthType, 0);
943 Value *Index = LoopBuilder.CreateSub(LoopPhi, CILoopOpSize, "bwd_index");
944 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, Index);
945 Value *Element = LoopBuilder.CreateAlignedLoad(
946 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
947 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, Index);
948 LoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
949 DstIsVolatile);
950
951 // Replace the unconditional branch introduced by
952 // SplitBlockAndInsertIfThenElse to turn LoopBB into a loop.
953 Instruction *UncondTerm = LoopBB->getTerminator();
954 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpEQ(Index, Zero), ExitBB,
955 LoopBB);
956 UncondTerm->eraseFromParent();
957
958 LoopPhi->addIncoming(Index, LoopBB);
959 LoopPhi->addIncoming(LoopBound, PredBB);
960 }
961
962 // Copying forward.
963 BasicBlock *FwdResidualBB = CopyForwardBB;
964 if (BytesCopiedInLoop != 0) {
965 CopyForwardBB->setName("memmove_fwd_loop");
966 BasicBlock *LoopBB = CopyForwardBB;
967 BasicBlock *SuccBB = ExitBB;
968 if (RemainingBytes != 0) {
969 // if we introduce residual code, it needs its separate BB
970 SuccBB = CopyForwardBB->splitBasicBlock(CopyForwardBB->getTerminator(),
971 "memmove_fwd_residual");
972 FwdResidualBB = SuccBB;
973 }
974 IRBuilder<> LoopBuilder(LoopBB->getTerminator());
975 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
976 PHINode *LoopPhi = LoopBuilder.CreatePHI(ILengthType, 0, "fwd_index");
977 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LoopPhi);
978 Value *Element = LoopBuilder.CreateAlignedLoad(
979 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element");
980 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LoopPhi);
981 LoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign,
982 DstIsVolatile);
983 Value *Index = LoopBuilder.CreateAdd(LoopPhi, CILoopOpSize);
984 LoopPhi->addIncoming(Index, LoopBB);
985 LoopPhi->addIncoming(Zero, OrigBB);
986
987 // Replace the unconditional branch to turn LoopBB into a loop.
988 Instruction *UncondTerm = LoopBB->getTerminator();
989 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpEQ(Index, LoopBound), SuccBB,
990 LoopBB);
991 UncondTerm->eraseFromParent();
992 }
993
994 if (RemainingBytes != 0) {
995 uint64_t BytesCopied = BytesCopiedInLoop;
996
997 // Residual code is required to move the remaining bytes. In the forward
998 // case, we emit it in the normal order.
999 IRBuilder<> FwdResBuilder(FwdResidualBB->getTerminator());
1000 FwdResBuilder.SetCurrentDebugLocation(DbgLoc);
1001 SmallVector<Type *, 5> RemainingOps;
1002 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes,
1003 SrcAS, DstAS, PartSrcAlign,
1004 PartDstAlign);
1005 for (auto *OpTy : RemainingOps)
1006 GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
1007 }
1008}
1009
1010static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
1011 Value *CopyLen, Value *SetValue, Align DstAlign,
1012 std::optional<uint64_t> AverageTripCount,
1013 bool IsVolatile) {
1014 Type *TypeOfCopyLen = CopyLen->getType();
1015 BasicBlock *OrigBB = InsertBefore->getParent();
1016 Function *F = OrigBB->getParent();
1017 const DataLayout &DL = F->getDataLayout();
1018 BasicBlock *NewBB =
1019 OrigBB->splitBasicBlock(InsertBefore, "split");
1020 BasicBlock *LoopBB
1021 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
1022
1023 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
1024 IRBuilder<> Builder(OrigBB->getTerminator());
1025 Builder.SetCurrentDebugLocation(DbgLoc);
1026
1027 auto *ToLoopBR = Builder.CreateCondBr(
1028 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
1029 LoopBB);
1030 MDBuilder MDB(F->getContext());
1031 if (AverageTripCount.has_value())
1032 ToLoopBR->setMetadata(LLVMContext::MD_prof,
1034 else
1036
1037 OrigBB->getTerminator()->eraseFromParent();
1038
1039 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType());
1040 Align PartAlign(commonAlignment(DstAlign, PartSize));
1041
1042 IRBuilder<> LoopBuilder(LoopBB);
1043 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
1044 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
1045 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
1046
1047 LoopBuilder.CreateAlignedStore(
1048 SetValue,
1049 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
1050 PartAlign, IsVolatile);
1051
1052 Value *NewIndex =
1053 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
1054 LoopIndex->addIncoming(NewIndex, LoopBB);
1055
1056 auto *LoopBR = LoopBuilder.CreateCondBr(
1057 LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB);
1058 if (AverageTripCount.has_value())
1059 setFittedBranchWeights(*LoopBR, {AverageTripCount.value(), 1},
1060 /*IsExpected=*/false);
1061 else
1063}
1064
1065template <typename T>
1067 if (SE) {
1068 const SCEV *SrcSCEV = SE->getSCEV(Memcpy->getRawSource());
1069 const SCEV *DestSCEV = SE->getSCEV(Memcpy->getRawDest());
1070 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy))
1071 return false;
1072 }
1073 return true;
1074}
1075
1077 const TargetTransformInfo &TTI,
1078 ScalarEvolution *SE) {
1079 bool CanOverlap = canOverlap(Memcpy, SE);
1080 auto TripCount = getAverageMemOpLoopTripCount(*Memcpy);
1081 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
1083 /* InsertBefore */ Memcpy,
1084 /* SrcAddr */ Memcpy->getRawSource(),
1085 /* DstAddr */ Memcpy->getRawDest(),
1086 /* CopyLen */ CI,
1087 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
1088 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
1089 /* SrcIsVolatile */ Memcpy->isVolatile(),
1090 /* DstIsVolatile */ Memcpy->isVolatile(),
1091 /* CanOverlap */ CanOverlap,
1092 /* TargetTransformInfo */ TTI,
1093 /* AtomicElementSize */ std::nullopt,
1094 /* AverageTripCount */ TripCount);
1095 } else {
1097 /* InsertBefore */ Memcpy,
1098 /* SrcAddr */ Memcpy->getRawSource(),
1099 /* DstAddr */ Memcpy->getRawDest(),
1100 /* CopyLen */ Memcpy->getLength(),
1101 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
1102 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
1103 /* SrcIsVolatile */ Memcpy->isVolatile(),
1104 /* DstIsVolatile */ Memcpy->isVolatile(),
1105 /* CanOverlap */ CanOverlap,
1106 /* TargetTransformInfo */ TTI,
1107 /* AtomicElementSize */ std::nullopt,
1108 /* AverageTripCount */ TripCount);
1109 }
1110}
1111
1113 const TargetTransformInfo &TTI) {
1114 Value *CopyLen = Memmove->getLength();
1115 Value *SrcAddr = Memmove->getRawSource();
1116 Value *DstAddr = Memmove->getRawDest();
1117 Align SrcAlign = Memmove->getSourceAlign().valueOrOne();
1118 Align DstAlign = Memmove->getDestAlign().valueOrOne();
1119 bool SrcIsVolatile = Memmove->isVolatile();
1120 bool DstIsVolatile = SrcIsVolatile;
1121 IRBuilder<> CastBuilder(Memmove);
1122 CastBuilder.SetCurrentDebugLocation(Memmove->getStableDebugLoc());
1123
1124 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace();
1125 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace();
1126 if (SrcAS != DstAS) {
1127 if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) {
1128 // We may not be able to emit a pointer comparison, but we don't have
1129 // to. Expand as memcpy.
1130 auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove);
1131 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
1133 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1134 SrcIsVolatile, DstIsVolatile,
1135 /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
1136 } else {
1138 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
1139 DstAlign, SrcIsVolatile, DstIsVolatile,
1140 /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
1141 }
1142
1143 return true;
1144 }
1145
1146 if (!(TTI.isValidAddrSpaceCast(DstAS, SrcAS) ||
1147 TTI.isValidAddrSpaceCast(SrcAS, DstAS))) {
1148 // We don't know generically if it's legal to introduce an
1149 // addrspacecast. We need to know either if it's legal to insert an
1150 // addrspacecast, or if the address spaces cannot alias.
1151 LLVM_DEBUG(
1152 dbgs() << "Do not know how to expand memmove between different "
1153 "address spaces\n");
1154 return false;
1155 }
1156 }
1157
1158 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
1160 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
1161 SrcIsVolatile, DstIsVolatile, TTI);
1162 } else {
1164 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
1165 SrcIsVolatile, DstIsVolatile, TTI);
1166 }
1167 return true;
1168}
1169
1171 createMemSetLoop(/* InsertBefore */ Memset,
1172 /* DstAddr */ Memset->getRawDest(),
1173 /* CopyLen */ Memset->getLength(),
1174 /* SetValue */ Memset->getValue(),
1175 /* Alignment */ Memset->getDestAlign().valueOrOne(),
1176 /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset),
1177 /* IsVolatile */ Memset->isVolatile());
1178}
1179
1181 createMemSetLoop(/* InsertBefore=*/Memset,
1182 /* DstAddr=*/Memset->getRawDest(),
1183 /* CopyLen=*/Memset->getLength(),
1184 /* SetValue=*/Memset->getValue(),
1185 /* Alignment=*/Memset->getDestAlign().valueOrOne(),
1186 /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset),
1187 /* IsVolatile */ Memset->isVolatile());
1188}
1189
1191 const TargetTransformInfo &TTI,
1192 ScalarEvolution *SE) {
1193 assert(AtomicMemcpy->isAtomic());
1194 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) {
1196 /* InsertBefore */ AtomicMemcpy,
1197 /* SrcAddr */ AtomicMemcpy->getRawSource(),
1198 /* DstAddr */ AtomicMemcpy->getRawDest(),
1199 /* CopyLen */ CI,
1200 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
1201 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
1202 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
1203 /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
1204 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
1205 /* TargetTransformInfo */ TTI,
1206 /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
1207 } else {
1209 /* InsertBefore */ AtomicMemcpy,
1210 /* SrcAddr */ AtomicMemcpy->getRawSource(),
1211 /* DstAddr */ AtomicMemcpy->getRawDest(),
1212 /* CopyLen */ AtomicMemcpy->getLength(),
1213 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
1214 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(),
1215 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
1216 /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
1217 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
1218 /* TargetTransformInfo */ TTI,
1219 /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
1220 }
1221}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
Definition Execution.cpp:41
#define DEBUG_TYPE
static std::pair< Value *, Value * > tryInsertCastToCommonAddrSpace(IRBuilderBase &B, Value *Addr1, Value *Addr2, const TargetTransformInfo &TTI)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore, Value *Len, unsigned MainLoopStep, unsigned ResidualLoopStep, StringRef BBNamePrefix, std::optional< uint64_t > AverageTripCount)
Insert the control flow and loop counters for a memcpy/memset loop expansion.
static void createMemMoveLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static Value * getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal)
static Value * getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize, unsigned OpSizeVal, Value *RTLoopRemainder=nullptr)
static void createMemMoveLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, std::optional< uint64_t > AverageTripCount, bool IsVolatile)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
This file contains the declarations for profiling metadata utility functions.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
This class represents any memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
@ ICMP_NE
not equal
Definition InstrTypes.h:698
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2318
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1872
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1343
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:1953
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2306
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2467
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2302
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1424
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1201
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1407
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1195
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1891
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2776
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
Definition MDBuilder.h:181
LLVM_ABI MDNode * createLikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards true destination.
Definition MDBuilder.cpp:43
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
Definition MDBuilder.h:174
Metadata node.
Definition Metadata.h:1080
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.experimental.memset.pattern intrinsic.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isKnownPredicateAt(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet)
Expand MemSetPattern as a loop. MemSet is not deleted.
LLVM_ABI bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
TargetTransformInfo TTI
LLVM_ABI void expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition Metadata.cpp:64
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt, std::optional< uint64_t > AverageTripCount=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
LLVM_ABI void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130