24 std::optional<uint32_t> AtomicElementSize) {
39 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
40 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
44 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.
value(), DstAlign.
value(),
47 "Atomic memcpy lowering is not supported for vector operand type");
49 unsigned LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
50 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
51 "Atomic memcpy lowering is not supported for selected operand size");
55 if (LoopEndCount != 0) {
66 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
67 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
68 if (SrcAddr->
getType() != SrcOpType) {
71 if (DstAddr->
getType() != DstOpType) {
85 PartSrcAlign, SrcIsVolatile);
88 Load->setMetadata(LLVMContext::MD_alias_scope,
94 Load, DstGEP, PartDstAlign, DstIsVolatile);
97 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
99 if (AtomicElementSize) {
100 Load->setAtomic(AtomicOrdering::Unordered);
101 Store->setAtomic(AtomicOrdering::Unordered);
113 uint64_t BytesCopied = LoopEndCount * LoopOpSize;
115 if (RemainingBytes) {
121 SrcAS, DstAS, SrcAlign.
value(),
122 DstAlign.
value(), AtomicElementSize);
124 for (
auto *OpTy : RemainingOps) {
129 unsigned OperandSize =
DL.getTypeStoreSize(OpTy);
131 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
132 "Atomic memcpy lowering is not supported for selected operand size");
134 uint64_t GepIndex = BytesCopied / OperandSize;
135 assert(GepIndex * OperandSize == BytesCopied &&
136 "Division should have no Remainder!");
138 PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
148 Load->setMetadata(LLVMContext::MD_alias_scope,
152 PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
162 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
164 if (AtomicElementSize) {
165 Load->setAtomic(AtomicOrdering::Unordered);
166 Store->setAtomic(AtomicOrdering::Unordered);
168 BytesCopied += OperandSize;
172 "Bytes copied should match size in the call!");
177 Align SrcAlign,
Align DstAlign,
bool SrcIsVolatile,
bool DstIsVolatile,
179 std::optional<uint32_t> AtomicElementSize) {
182 PreLoopBB->
splitBasicBlock(InsertBefore,
"post-loop-memcpy-expansion");
188 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(
"MemCopyDomain");
190 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain,
Name);
192 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
193 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
196 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.
value(), DstAlign.
value(),
199 "Atomic memcpy lowering is not supported for vector operand type");
200 unsigned LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
201 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
202 "Atomic memcpy lowering is not supported for selected operand size");
206 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
207 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
208 if (SrcAddr->
getType() != SrcOpType) {
209 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
211 if (DstAddr->
getType() != DstOpType) {
212 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
217 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
219 "expected size argument to memcpy to be an integer type!");
221 bool LoopOpIsInt8 = LoopOpType == Int8Type;
223 Value *RuntimeLoopCount = LoopOpIsInt8 ?
225 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
233 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2,
"loop-index");
236 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
237 LoadInst *
Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
238 PartSrcAlign, SrcIsVolatile);
241 Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
243 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
245 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
250 if (AtomicElementSize) {
251 Load->setAtomic(AtomicOrdering::Unordered);
252 Store->setAtomic(AtomicOrdering::Unordered);
258 bool requiresResidual =
259 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize);
260 if (requiresResidual) {
261 Type *ResLoopOpType = AtomicElementSize
264 unsigned ResLoopOpSize =
DL.getTypeStoreSize(ResLoopOpType);
265 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
266 "Store size is expected to match type size");
269 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
270 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
278 Ctx,
"loop-memcpy-residual-header", PreLoopBB->
getParent(),
nullptr);
286 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
287 LoopBB, ResHeaderBB);
290 LoopBuilder.CreateCondBr(
291 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
296 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
297 ResLoopBB, PostLoopBB);
302 ResBuilder.CreatePHI(CopyLenType, 2,
"residual-loop-index");
305 Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast(
306 SrcAddr, PointerType::get(ResLoopOpType, SrcAS));
307 Value *DstAsResLoopOpType = ResBuilder.CreateBitCast(
308 DstAddr, PointerType::get(ResLoopOpType, DstAS));
309 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
310 Value *SrcGEP = ResBuilder.CreateInBoundsGEP(
311 ResLoopOpType, SrcAsResLoopOpType, FullOffset);
312 LoadInst *
Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
313 PartSrcAlign, SrcIsVolatile);
316 Load->setMetadata(LLVMContext::MD_alias_scope,
319 Value *DstGEP = ResBuilder.CreateInBoundsGEP(
320 ResLoopOpType, DstAsResLoopOpType, FullOffset);
321 StoreInst *
Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
327 if (AtomicElementSize) {
328 Load->setAtomic(AtomicOrdering::Unordered);
329 Store->setAtomic(AtomicOrdering::Unordered);
331 Value *ResNewIndex = ResBuilder.CreateAdd(
333 ResidualIndex->
addIncoming(ResNewIndex, ResLoopBB);
336 ResBuilder.CreateCondBr(
337 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
345 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
348 LoopBuilder.CreateCondBr(
349 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
378 Align DstAlign,
bool SrcIsVolatile,
379 bool DstIsVolatile) {
400 SrcAddr, DstAddr,
"compare_src_dst");
411 CopyBackwardsBB->
setName(
"copy_backwards");
413 CopyForwardBB->
setName(
"copy_forward");
415 ExitBB->
setName(
"memmove_done");
417 unsigned PartSize =
DL.getTypeStoreSize(EltTy);
436 PartSrcAlign,
"element");
452 PHINode *FwdCopyPhi = FwdLoopBuilder.
CreatePHI(TypeOfCopyLen, 0,
"index_ptr");
484 unsigned dstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
485 DstAddr =
Builder.CreateBitCast(DstAddr,
486 PointerType::get(
SetValue->getType(), dstAS));
493 unsigned PartSize =
DL.getTypeStoreSize(
SetValue->getType());
503 PartAlign, IsVolatile);
517 auto *DestSCEV = SE->
getSCEV(Memcpy->getRawDest());
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, bool IsVolatile)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents the atomic memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const BasicBlock * getParent() const
bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Class to represent pointers.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
This is an optimization pass for GlobalISel generic memory operations.
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
void expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.