19#define DEBUG_TYPE "lower-mem-intrinsics"
27 std::optional<uint32_t> AtomicElementSize) {
42 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
43 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
47 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.
value(), DstAlign.
value(),
50 "Atomic memcpy lowering is not supported for vector operand type");
52 unsigned LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
53 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
54 "Atomic memcpy lowering is not supported for selected operand size");
58 if (LoopEndCount != 0) {
69 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
70 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
71 if (SrcAddr->
getType() != SrcOpType) {
74 if (DstAddr->
getType() != DstOpType) {
88 PartSrcAlign, SrcIsVolatile);
91 Load->setMetadata(LLVMContext::MD_alias_scope,
97 Load, DstGEP, PartDstAlign, DstIsVolatile);
100 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
102 if (AtomicElementSize) {
103 Load->setAtomic(AtomicOrdering::Unordered);
104 Store->setAtomic(AtomicOrdering::Unordered);
116 uint64_t BytesCopied = LoopEndCount * LoopOpSize;
118 if (RemainingBytes) {
124 SrcAS, DstAS, SrcAlign.
value(),
125 DstAlign.
value(), AtomicElementSize);
127 for (
auto *OpTy : RemainingOps) {
132 unsigned OperandSize =
DL.getTypeStoreSize(OpTy);
134 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
135 "Atomic memcpy lowering is not supported for selected operand size");
137 uint64_t GepIndex = BytesCopied / OperandSize;
138 assert(GepIndex * OperandSize == BytesCopied &&
139 "Division should have no Remainder!");
141 PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS);
151 Load->setMetadata(LLVMContext::MD_alias_scope,
155 PointerType *DstPtrType = PointerType::get(OpTy, DstAS);
165 Store->setMetadata(LLVMContext::MD_noalias,
MDNode::get(Ctx, NewScope));
167 if (AtomicElementSize) {
168 Load->setAtomic(AtomicOrdering::Unordered);
169 Store->setAtomic(AtomicOrdering::Unordered);
171 BytesCopied += OperandSize;
175 "Bytes copied should match size in the call!");
180 Align SrcAlign,
Align DstAlign,
bool SrcIsVolatile,
bool DstIsVolatile,
182 std::optional<uint32_t> AtomicElementSize) {
185 PreLoopBB->
splitBasicBlock(InsertBefore,
"post-loop-memcpy-expansion");
191 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(
"MemCopyDomain");
193 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain,
Name);
195 unsigned SrcAS = cast<PointerType>(SrcAddr->
getType())->getAddressSpace();
196 unsigned DstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
199 Ctx, CopyLen, SrcAS, DstAS, SrcAlign.
value(), DstAlign.
value(),
202 "Atomic memcpy lowering is not supported for vector operand type");
203 unsigned LoopOpSize =
DL.getTypeStoreSize(LoopOpType);
204 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
205 "Atomic memcpy lowering is not supported for selected operand size");
209 PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS);
210 PointerType *DstOpType = PointerType::get(LoopOpType, DstAS);
211 if (SrcAddr->
getType() != SrcOpType) {
212 SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType);
214 if (DstAddr->
getType() != DstOpType) {
215 DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType);
220 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType);
222 "expected size argument to memcpy to be an integer type!");
224 bool LoopOpIsInt8 = LoopOpType == Int8Type;
226 Value *RuntimeLoopCount = LoopOpIsInt8 ?
228 PLBuilder.CreateUDiv(CopyLen, CILoopOpSize);
236 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2,
"loop-index");
239 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex);
240 LoadInst *
Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP,
241 PartSrcAlign, SrcIsVolatile);
244 Load->setMetadata(LLVMContext::MD_alias_scope,
MDNode::get(Ctx, NewScope));
246 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex);
248 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile);
253 if (AtomicElementSize) {
254 Load->setAtomic(AtomicOrdering::Unordered);
255 Store->setAtomic(AtomicOrdering::Unordered);
261 bool requiresResidual =
262 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize);
263 if (requiresResidual) {
264 Type *ResLoopOpType = AtomicElementSize
267 unsigned ResLoopOpSize =
DL.getTypeStoreSize(ResLoopOpType);
268 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) &&
269 "Store size is expected to match type size");
272 Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize);
273 Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual);
281 Ctx,
"loop-memcpy-residual-header", PreLoopBB->
getParent(),
nullptr);
289 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
290 LoopBB, ResHeaderBB);
293 LoopBuilder.CreateCondBr(
294 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
299 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero),
300 ResLoopBB, PostLoopBB);
305 ResBuilder.CreatePHI(CopyLenType, 2,
"residual-loop-index");
308 Value *SrcAsResLoopOpType = ResBuilder.CreateBitCast(
309 SrcAddr, PointerType::get(ResLoopOpType, SrcAS));
310 Value *DstAsResLoopOpType = ResBuilder.CreateBitCast(
311 DstAddr, PointerType::get(ResLoopOpType, DstAS));
312 Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex);
313 Value *SrcGEP = ResBuilder.CreateInBoundsGEP(
314 ResLoopOpType, SrcAsResLoopOpType, FullOffset);
315 LoadInst *
Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP,
316 PartSrcAlign, SrcIsVolatile);
319 Load->setMetadata(LLVMContext::MD_alias_scope,
322 Value *DstGEP = ResBuilder.CreateInBoundsGEP(
323 ResLoopOpType, DstAsResLoopOpType, FullOffset);
324 StoreInst *
Store = ResBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign,
330 if (AtomicElementSize) {
331 Load->setAtomic(AtomicOrdering::Unordered);
332 Store->setAtomic(AtomicOrdering::Unordered);
334 Value *ResNewIndex = ResBuilder.CreateAdd(
336 ResidualIndex->
addIncoming(ResNewIndex, ResLoopBB);
339 ResBuilder.CreateCondBr(
340 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB,
348 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero),
351 LoopBuilder.CreateCondBr(
352 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB,
381 Align DstAlign,
bool SrcIsVolatile,
398 SrcAddr, DstAddr,
"compare_src_dst");
409 CopyBackwardsBB->
setName(
"copy_backwards");
411 CopyForwardBB->
setName(
"copy_forward");
413 ExitBB->
setName(
"memmove_done");
415 unsigned PartSize =
DL.getTypeStoreSize(EltTy);
435 PartSrcAlign,
"element");
451 PHINode *FwdCopyPhi = FwdLoopBuilder.
CreatePHI(TypeOfCopyLen, 0,
"index_ptr");
483 unsigned dstAS = cast<PointerType>(DstAddr->
getType())->getAddressSpace();
484 DstAddr =
Builder.CreateBitCast(DstAddr,
485 PointerType::get(
SetValue->getType(), dstAS));
492 unsigned PartSize =
DL.getTypeStoreSize(
SetValue->getType());
502 PartAlign, IsVolatile);
516 auto *DestSCEV = SE->
getSCEV(Memcpy->getRawDest());
562 bool DstIsVolatile = SrcIsVolatile;
567 if (SrcAS != DstAS) {
571 if (
ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
573 CI, SrcAlign, DstAlign, SrcIsVolatile,
578 CopyLen, SrcAlign, DstAlign, SrcIsVolatile,
595 dbgs() <<
"Do not know how to expand memmove between different "
602 Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
603 SrcIsVolatile, DstIsVolatile,
TTI);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF)
static bool canOverlap(MemTransferBase< T > *Memcpy, ScalarEvolution *SE)
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, Value *CopyLen, Value *SetValue, Align DstAlign, bool IsVolatile)
static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, const TargetTransformInfo &TTI)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class represents the atomic memcpy intrinsic i.e.
uint32_t getElementSizeInBytes() const
LLVM Basic Block Representation.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const BasicBlock * getParent() const
bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Common base class for all memory transfer intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Class to represent pointers.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Instruction *CtxI)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static IntegerType * getInt8Ty(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void setName(const Twine &Name)
Change the name of the value.
This is an optimization pass for GlobalISel generic memory operations.
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< uint32_t > AtomicCpySize=std::nullopt)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI)
Expand MemMove as a loop.
void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, std::optional< unsigned > AtomicSize=std::nullopt)
Emit a loop implementing the semantics of llvm.memcpy where the size is not a compile-time constant.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
void expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE)
Expand AtomicMemCpy as a loop. AtomicMemCpy is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.