LLVM  15.0.0git
NVPTXLowerAggrCopies.cpp
Go to the documentation of this file.
1 //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
11 // the size is large or is not a compile-time constant.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "NVPTXLowerAggrCopies.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DataLayout.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/Intrinsics.h"
25 #include "llvm/IR/LLVMContext.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/Support/Debug.h"
30 
31 #define DEBUG_TYPE "nvptx"
32 
33 using namespace llvm;
34 
35 namespace {
36 
37 // actual analysis class, which is a functionpass
38 struct NVPTXLowerAggrCopies : public FunctionPass {
39  static char ID;
40 
41  NVPTXLowerAggrCopies() : FunctionPass(ID) {}
42 
43  void getAnalysisUsage(AnalysisUsage &AU) const override {
46  }
47 
48  bool runOnFunction(Function &F) override;
49 
50  static const unsigned MaxAggrCopySize = 128;
51 
52  StringRef getPassName() const override {
53  return "Lower aggregate copies/intrinsics into loops";
54  }
55 };
56 
58 
62 
63  const DataLayout &DL = F.getParent()->getDataLayout();
64  LLVMContext &Context = F.getParent()->getContext();
65  const TargetTransformInfo &TTI =
66  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
67 
68  // Collect all aggregate loads and mem* calls.
69  for (BasicBlock &BB : F) {
70  for (Instruction &I : BB) {
71  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
72  if (!LI->hasOneUse())
73  continue;
74 
75  if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
76  continue;
77 
78  if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
79  if (SI->getOperand(0) != LI)
80  continue;
81  AggrLoads.push_back(LI);
82  }
83  } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(&I)) {
84  // Convert intrinsic calls with variable size or with constant size
85  // larger than the MaxAggrCopySize threshold.
86  if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
87  if (LenCI->getZExtValue() >= MaxAggrCopySize) {
88  MemCalls.push_back(IntrCall);
89  }
90  } else {
91  MemCalls.push_back(IntrCall);
92  }
93  }
94  }
95  }
96 
97  if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
98  return false;
99  }
100 
101  //
102  // Do the transformation of an aggr load/copy/set to a loop
103  //
104  for (LoadInst *LI : AggrLoads) {
105  auto *SI = cast<StoreInst>(*LI->user_begin());
106  Value *SrcAddr = LI->getOperand(0);
107  Value *DstAddr = SI->getOperand(1);
108  unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
109  ConstantInt *CopyLen =
111 
112  createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
113  /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
114  /* CopyLen */ CopyLen,
115  /* SrcAlign */ LI->getAlign(),
116  /* DestAlign */ SI->getAlign(),
117  /* SrcIsVolatile */ LI->isVolatile(),
118  /* DstIsVolatile */ SI->isVolatile(),
119  /* CanOverlap */ true, TTI);
120 
121  SI->eraseFromParent();
122  LI->eraseFromParent();
123  }
124 
125  // Transform mem* intrinsic calls.
126  for (MemIntrinsic *MemCall : MemCalls) {
127  if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
128  expandMemCpyAsLoop(Memcpy, TTI);
129  } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
130  expandMemMoveAsLoop(Memmove);
131  } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
132  expandMemSetAsLoop(Memset);
133  }
134  MemCall->eraseFromParent();
135  }
136 
137  return true;
138 }
139 
140 } // namespace
141 
142 namespace llvm {
144 }
145 
146 INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
147  "Lower aggregate copies, and llvm.mem* intrinsics into loops",
148  false, false)
149 
151  return new NVPTXLowerAggrCopies();
152 }
StackProtector.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
IntrinsicInst.h
llvm::Function
Definition: Function.h:60
llvm::MemMoveInst
This class wraps the llvm.memmove intrinsic.
Definition: IntrinsicInst.h:1054
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
Module.h
llvm::MemIntrinsic
This is the common base class for memset/memcpy/memmove.
Definition: IntrinsicInst.h:957
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
Constants.h
llvm::initializeNVPTXLowerAggrCopiesPass
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &)
Intrinsics.h
llvm::StackProtector
Definition: StackProtector.h:36
llvm::createLowerAggrCopies
FunctionPass * createLowerAggrCopies()
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
INITIALIZE_PASS
INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies", "Lower aggregate copies, and llvm.mem* intrinsics into loops", false, false) FunctionPass *llvm
Definition: NVPTXLowerAggrCopies.cpp:146
llvm::Instruction
Definition: Instruction.h:42
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:989
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:58
IRBuilder.h
NVPTXLowerAggrCopies.h
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::expandMemSetAsLoop
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
Definition: LowerMemIntrinsics.cpp:569
DataLayout.h
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::expandMemMoveAsLoop
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
Definition: LowerMemIntrinsics.cpp:558
Function.h
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:1041
TargetTransformInfo.h
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::createMemCpyLoopKnownSize
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, Optional< uint32_t > AtomicCpySize=None)
Emit a loop implementing the semantics of an llvm.memcpy whose size is a compile time constant.
Definition: LowerMemIntrinsics.cpp:19
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
llvm::expandMemCpyAsLoop
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
Definition: LowerMemIntrinsics.cpp:527
BasicBlockUtils.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
LowerMemIntrinsics.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38