37#define INSTR_PROF_VALUE_PROF_MEMOP_API
52#define DEBUG_TYPE "pgo-memop-opt"
54STATISTIC(NumOfPGOMemOPOpt,
"Number of memop intrinsics optimized.");
55STATISTIC(NumOfPGOMemOPAnnotate,
"Number of memop intrinsics annotated.");
62 cl::desc(
"The minimum count to optimize memory "
74 cl::desc(
"The percentage threshold for the "
75 "memory intrinsic calls optimization"));
80 cl::desc(
"The max version for the optimized memory "
86 cl::desc(
"Scale the memop size counts using the basic "
87 " block count value"));
92 cl::desc(
"Size-specialize memcmp and bcmp calls"));
96 cl::desc(
"Optimize the memop size <= this value"));
103 switch (
MI->getIntrinsicID()) {
104 case Intrinsic::memcpy:
106 case Intrinsic::memmove:
108 case Intrinsic::memset:
118 MemOp(MemIntrinsic *
MI) : I(
MI) {}
119 MemOp(CallInst *CI) : I(CI) {}
123 if (
auto MI = asMI())
128 if (
auto MI = asMI())
129 return MI->getLength();
130 return asCI()->getArgOperand(2);
133 if (
auto MI = asMI())
135 asCI()->setArgOperand(2,
Length);
137 StringRef getFuncName() {
138 if (
auto MI = asMI())
139 return MI->getCalledFunction()->getName();
140 return asCI()->getCalledFunction()->getName();
143 if (
auto MI = asMI())
144 if (
MI->getIntrinsicID() == Intrinsic::memmove)
148 bool isMemcmp(TargetLibraryInfo &TLI) {
150 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
151 Func == LibFunc_memcmp) {
156 bool isBcmp(TargetLibraryInfo &TLI) {
158 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
159 Func == LibFunc_bcmp) {
164 const char *
getName(TargetLibraryInfo &TLI) {
165 if (
auto MI = asMI())
166 return getMIName(
MI);
169 if (Func == LibFunc_memcmp)
171 if (Func == LibFunc_bcmp)
179class MemOPSizeOpt :
public InstVisitor<MemOPSizeOpt> {
181 MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
182 OptimizationRemarkEmitter &ORE, DominatorTree *DT,
183 TargetLibraryInfo &TLI)
184 : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(
false) {}
185 bool isChanged()
const {
return Changed; }
190 for (
auto &MO : WorkList) {
191 ++NumOfPGOMemOPAnnotate;
196 <<
"is Transformed.\n");
201 void visitMemIntrinsic(MemIntrinsic &
MI) {
206 WorkList.push_back(MemOp(&
MI));
209 void visitCallInst(CallInst &CI) {
211 if (TLI.getLibFunc(CI, Func) &&
212 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
214 WorkList.push_back(MemOp(&CI));
220 BlockFrequencyInfo &BFI;
221 OptimizationRemarkEmitter &ORE;
223 TargetLibraryInfo &TLI;
225 std::vector<MemOp> WorkList;
226 bool perform(MemOp MO);
244 return ScaleCount / Denom;
247bool MemOPSizeOpt::perform(
MemOp MO) {
254 uint32_t MaxNumVals = INSTR_PROF_NUM_BUCKETS;
261 uint64_t ActualCount = TotalCount;
262 uint64_t SavedTotalCount = TotalCount;
267 ActualCount = *BBEdgeCount;
270 LLVM_DEBUG(
dbgs() <<
"Read one memory intrinsic profile with count "
271 << ActualCount <<
"\n");
274 : VDs) {
dbgs() <<
" (" << VD.Value <<
"," << VD.Count <<
")\n"; });
283 TotalCount = ActualCount;
286 <<
" denominator = " << SavedTotalCount <<
"\n");
289 uint64_t RemainCount = TotalCount;
290 uint64_t SavedRemainCount = SavedTotalCount;
291 SmallVector<uint64_t, 16> SizeIds;
292 SmallVector<uint64_t, 16> CaseCounts;
293 SmallDenseSet<uint64_t, 16> SeenSizeId;
294 uint64_t MaxCount = 0;
299 for (
auto I = VDs.begin(),
E = VDs.end();
I !=
E; ++
I) {
301 int64_t
V = VD.Value;
302 uint64_t
C = VD.Count;
304 C = getScaledCount(
C, ActualCount, SavedTotalCount);
318 if (!SeenSizeId.
insert(V).second) {
319 errs() <<
"warning: Invalid Profile Data in Function " <<
Func.getName()
320 <<
": Two identical values in MemOp value counts.\n";
331 assert(SavedRemainCount >= VD.Count);
332 SavedRemainCount -= VD.Count;
343 CaseCounts[0] = RemainCount;
344 if (RemainCount > MaxCount)
345 MaxCount = RemainCount;
347 uint64_t SumForOpt = TotalCount - RemainCount;
350 <<
" Versions (covering " << SumForOpt <<
" out of "
351 << TotalCount <<
")\n");
379 MergeBB->
setName(
"MemOP.Merge");
381 DefaultBB->
setName(
"MemOP.Default");
383 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
384 auto &Ctx =
Func.getContext();
387 Value *SizeVar = MO.getLength();
388 SwitchInst *
SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.
size());
389 Type *MemOpTy = MO.I->getType();
390 PHINode *
PHI =
nullptr;
394 PHI = IRBM.CreatePHI(MemOpTy, SizeIds.
size() + 1,
"MemOP.RVMerge");
395 MO.I->replaceAllUsesWith(
PHI);
396 PHI->addIncoming(MO.I, DefaultBB);
400 MO.I->setMetadata(LLVMContext::MD_prof,
nullptr);
402 if (SavedRemainCount > 0 ||
Version != VDs.size()) {
405 IPVK_MemOPSize, VDs.
size());
410 std::vector<DominatorTree::UpdateType> Updates;
412 Updates.reserve(2 * SizeIds.
size());
414 for (uint64_t SizeId : SizeIds) {
416 Ctx, Twine(
"MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
417 MemOp NewMO = MO.clone();
420 assert(SizeType &&
"Expected integer type size argument.");
421 ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
422 NewMO.setLength(CaseSizeId);
423 NewMO.I->insertInto(CaseBB, CaseBB->
end());
425 IRBCase.CreateBr(MergeBB);
426 SI->addCase(CaseSizeId, CaseBB);
428 PHI->addIncoming(NewMO.I, CaseBB);
430 Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
431 Updates.push_back({DominatorTree::Insert, BB, CaseBB});
435 DTU.applyUpdates(Updates);
447 return OptimizationRemark(
DEBUG_TYPE,
"memopt-opt", MO.I)
448 <<
"optimized " <<
NV(
"Memop", MO.getName(TLI)) <<
" with count "
449 <<
NV(
"Count", SumForOpt) <<
" out of " <<
NV(
"Total", TotalCount)
450 <<
" for " <<
NV(
"Versions",
Version) <<
" versions";
465 MemOPSizeOpt MemOPSizeOpt(
F, BFI, ORE, DT, TLI);
466 MemOPSizeOpt.perform();
467 return MemOPSizeOpt.isChanged();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Function Alias Analysis false
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This header defines various interfaces for pass management in LLVM.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Value * getArgOperand(unsigned i) const
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Base class for instruction visitors.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
This is the common base class for memset/memcpy/memmove.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &MAM)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
bool isVoidTy() const
Return true if this is 'void'.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< FuncNode * > Func
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
LLVM_ABI void setProfMetadata(Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
FunctionAddr VTableAddr uintptr_t uintptr_t Version
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
std::enable_if_t< std::is_unsigned_v< T >, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::desc("The max version for the optimized memory " " intrinsic calls"))
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.