Go to the documentation of this file.
37 #define INSTR_PROF_VALUE_PROF_MEMOP_API
52 #define DEBUG_TYPE "pgo-memop-opt"
54 STATISTIC(NumOfPGOMemOPOpt,
"Number of memop intrinsics optimized.");
55 STATISTIC(NumOfPGOMemOPAnnotate,
"Number of memop intrinsics annotated.");
61 cl::desc(
"The minimum count to optimize memory "
73 cl::desc(
"The percentage threshold for the "
74 "memory intrinsic calls optimization"));
80 cl::desc(
"The max version for the optimized memory "
86 cl::desc(
"Scale the memop size counts using the basic "
87 " block count value"));
92 cl::desc(
"Size-specialize memcmp and bcmp calls"));
96 cl::desc(
"Optimize the memop size <= this value"));
101 switch (
MI->getIntrinsicID()) {
104 case Intrinsic::memmove:
106 case Intrinsic::memset:
119 CallInst *asCI() {
return cast<CallInst>(
I); }
121 if (
auto MI = asMI())
122 return MemOp(cast<MemIntrinsic>(
MI->clone()));
123 return MemOp(cast<CallInst>(asCI()->clone()));
126 if (
auto MI = asMI())
127 return MI->getLength();
128 return asCI()->getArgOperand(2);
130 void setLength(
Value *Length) {
131 if (
auto MI = asMI())
132 return MI->setLength(Length);
133 asCI()->setArgOperand(2, Length);
136 if (
auto MI = asMI())
137 return MI->getCalledFunction()->getName();
138 return asCI()->getCalledFunction()->getName();
141 if (
auto MI = asMI())
142 if (
MI->getIntrinsicID() == Intrinsic::memmove)
148 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
149 Func == LibFunc_memcmp) {
156 if (asMI() ==
nullptr && TLI.
getLibFunc(*asCI(), Func) &&
157 Func == LibFunc_bcmp) {
163 if (
auto MI = asMI())
164 return getMIName(
MI);
167 if (Func == LibFunc_memcmp)
169 if (Func == LibFunc_bcmp)
177 class MemOPSizeOpt :
public InstVisitor<MemOPSizeOpt> {
184 std::make_unique<InstrProfValueData[]>(INSTR_PROF_NUM_BUCKETS);
186 bool isChanged()
const {
return Changed; }
191 for (
auto &MO : WorkList) {
192 ++NumOfPGOMemOPAnnotate;
197 <<
"is Transformed.\n");
203 Value *Length =
MI.getLength();
205 if (isa<ConstantInt>(Length))
207 WorkList.push_back(
MemOp(&
MI));
213 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
215 WorkList.push_back(
MemOp(&CI));
226 std::vector<MemOp> WorkList;
228 std::unique_ptr<InstrProfValueData[]> ValueDataArray;
229 bool perform(
MemOp MO);
233 assert(Count <= TotalCount);
247 return ScaleCount / Denom;
250 bool MemOPSizeOpt::perform(
MemOp MO) {
257 uint32_t NumVals, MaxNumVals = INSTR_PROF_NUM_BUCKETS;
260 ValueDataArray.get(), NumVals, TotalCount))
264 uint64_t SavedTotalCount = TotalCount;
266 auto BBEdgeCount =
BFI.getBlockProfileCount(MO.I->getParent());
269 ActualCount = *BBEdgeCount;
273 LLVM_DEBUG(
dbgs() <<
"Read one memory intrinsic profile with count "
274 << ActualCount <<
"\n");
277 : VDs) {
dbgs() <<
" (" << VD.Value <<
"," << VD.Count <<
")\n"; });
286 TotalCount = ActualCount;
289 <<
" denominator = " << SavedTotalCount <<
"\n");
293 uint64_t SavedRemainCount = SavedTotalCount;
300 CaseCounts.push_back(0);
302 for (
auto I = VDs.begin(),
E = VDs.end();
I !=
E; ++
I) {
304 int64_t V = VD.Value;
307 C = getScaledCount(
C, ActualCount, SavedTotalCount);
310 RemainingVDs.push_back(VD);
316 if (!isProfitable(
C, RemainCount)) {
317 RemainingVDs.
insert(RemainingVDs.end(),
I,
E);
323 <<
": Two consecutive, identical values in MemOp value"
330 SizeIds.push_back(V);
331 CaseCounts.push_back(
C);
337 assert(SavedRemainCount >= VD.Count);
338 SavedRemainCount -= VD.Count;
341 RemainingVDs.
insert(RemainingVDs.end(),
I + 1,
E);
349 CaseCounts[0] = RemainCount;
350 if (RemainCount > MaxCount)
351 MaxCount = RemainCount;
353 uint64_t SumForOpt = TotalCount - RemainCount;
356 <<
" Versions (covering " << SumForOpt <<
" out of "
357 << TotalCount <<
")\n");
378 auto OrigBBFreq =
BFI.getBlockFreq(
BB);
385 MergeBB->
setName(
"MemOP.Merge");
386 BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
387 DefaultBB->
setName(
"MemOP.Default");
390 auto &Ctx =
Func.getContext();
392 BB->getTerminator()->eraseFromParent();
393 Value *SizeVar = MO.getLength();
394 SwitchInst *
SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
395 Type *MemOpTy = MO.I->getType();
400 PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1,
"MemOP.RVMerge");
401 MO.I->replaceAllUsesWith(PHI);
406 MO.I->setMetadata(LLVMContext::MD_prof,
nullptr);
408 if (SavedRemainCount > 0 ||
Version != NumVals) {
412 IPVK_MemOPSize, NumVals);
417 std::vector<DominatorTree::UpdateType> Updates;
419 Updates.reserve(2 * SizeIds.size());
423 Ctx,
Twine(
"MemOP.Case.") +
Twine(SizeId), &Func, DefaultBB);
424 MemOp NewMO = MO.clone();
426 auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
427 assert(SizeType &&
"Expected integer type size argument.");
429 NewMO.setLength(CaseSizeId);
432 IRBCase.CreateBr(MergeBB);
433 SI->addCase(CaseSizeId, CaseBB);
442 DTU.applyUpdates(Updates);
454 <<
"optimized " <<
NV(
"Memop", MO.getName(TLI)) <<
" with count "
455 <<
NV(
"Count", SumForOpt) <<
" out of " <<
NV(
"Total", TotalCount)
456 <<
" for " <<
NV(
"Versions",
Version) <<
" versions";
469 if (
F.hasFnAttribute(Attribute::OptimizeForSize))
471 MemOPSizeOpt MemOPSizeOpt(
F,
BFI, ORE, DT, TLI);
472 MemOPSizeOpt.perform();
473 return MemOPSizeOpt.isChanged();
std::enable_if_t< std::is_unsigned< T >::value, T > SaturatingMultiply(T X, T Y, bool *ResultOverflowed=nullptr)
Multiply two unsigned integers, X and Y, of type T.
A set of analyses that are preserved following a run of a transformation pass.
static StringRef getName(Value *V)
This is an optimization pass for GlobalISel generic memory operations.
InstListType::iterator iterator
Instruction iterators...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
static cl::opt< unsigned > MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value"))
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionAnalysisManager FAM
The instances of the Type class are immutable: once they are created, they are never changed.
static constexpr UpdateKind Insert
This is the common base class for memset/memcpy/memmove.
DiagnosticInfoOptimizationBase::Argument NV
static cl::opt< unsigned > MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(1000), cl::desc("The minimum count to optimize memory " "intrinsic calls"))
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
This is the shared class of boolean and integer constants.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< unsigned > MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the " "memory intrinsic calls optimization"))
(vector float) vec_cmpeq(*A, *B) C
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
STATISTIC(NumFunctions, "Total number of functions")
void setName(const Twine &Name)
Change the name of the value.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Analysis pass which computes BlockFrequencyInfo.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static cl::opt< bool > MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, cl::desc("Scale the memop size counts using the basic " " block count value"))
initializer< Ty > init(const Ty &Val)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
StandardInstrumentations SI(Debug, VerifyEach)
static cl::opt< unsigned > MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, cl::ZeroOrMore, cl::desc("The max version for the optimized memory " " intrinsic calls"))
bool isVoidTy() const
Return true if this is 'void'.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
StringRef - Represent a constant reference to a string, i.e.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Base class for instruction visitors.
void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static cl::opt< bool > DisableMemOPOPT("disable-memop-opt", cl::init(false), cl::Hidden, cl::desc("Disable optimize"))
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Provides information about what library functions are available for the current target.
Analysis pass which computes a DominatorTree.
const InstListType & getInstList() const
Return the underlying instruction list container.
Value * getArgOperand(unsigned i) const
void setProfMetadata(Module *M, Instruction *TI, ArrayRef< uint64_t > EdgeCounts, uint64_t MaxCount)
A container for analyses that lazily runs them and caches their results.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, InstrProfValueData ValueData[], uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst which is annotated with value profile meta data.
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
cl::opt< bool > MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true), cl::Hidden, cl::desc("Size-specialize memcmp and bcmp calls"))
LLVM Value Representation.
Analysis pass providing the TargetLibraryInfo.
iterator insert(iterator I, T &&Elt)