LLVM  13.0.0git
InlineSizeEstimatorAnalysis.cpp
Go to the documentation of this file.
1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements feature and label extraction for offline supervised learning
11 // of a IR to native size model.
12 //
13 //===----------------------------------------------------------------------===//
15 
16 #ifdef LLVM_HAVE_TF_API
18 #endif
19 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Dominators.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/MC/MCAsmLayout.h"
28 #include "llvm/Support/Casting.h"
31 
32 #include <algorithm>
33 #include <deque>
34 
35 using namespace llvm;
36 
38 
39 #define DEBUG_TYPE "inline-size-estimator"
40 
41 #ifdef LLVM_HAVE_TF_API
42 cl::opt<std::string> TFIR2NativeModelPath(
43  "ml-inliner-ir2native-model", cl::Hidden,
44  cl::desc("Path to saved model evaluating native size from IR."));
45 
46 namespace {
47 unsigned getMaxInstructionID() {
48 #define LAST_OTHER_INST(NR) return NR;
49 #include "llvm/IR/Instruction.def"
50 }
51 
52 class IRToNativeSizeLearning {
53 public:
54  enum class NamedFeatureIndex : size_t {
55  InitialSize,
56  Blocks,
57  Calls,
58  IsLocal,
59  IsLinkOnceODR,
60  IsLinkOnce,
61  Loops,
62  MaxLoopDepth,
63  MaxDomTreeLevel,
64 
65  NumNamedFeatures
66  };
67  static const size_t NumNamedFeatures =
68  static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
69  struct FunctionFeatures {
70  static const size_t FeatureCount;
71 
72  std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
73  std::vector<int32_t> InstructionHistogram;
74  std::vector<int32_t> InstructionPairHistogram;
75 
76  void fillTensor(int32_t *Ptr) const;
77  int32_t &operator[](NamedFeatureIndex Pos) {
78  return NamedFeatures[static_cast<size_t>(Pos)];
79  }
80  };
81  IRToNativeSizeLearning() = default;
82 
83  static FunctionFeatures getFunctionFeatures(Function &F,
85 };
86 
87 // This is a point in time - we determined including these pairs of
88 // consecutive instructions (in the IR layout available at inline time) as
89 // features improves the model performance. We want to move away from manual
90 // feature selection.
91 // The array is given in opcode pairs rather than labels because 1) labels
92 // weren't readily available, and 2) the successions were hand - extracted.
93 //
94 // This array must be sorted.
95 static const std::array<std::pair<size_t, size_t>, 137>
96  ImportantInstructionSuccessions{
97  {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},
98  {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},
99  {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},
100  {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},
101  {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},
102  {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},
103  {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
104  {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},
105  {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},
106  {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
107  {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},
108  {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},
109  {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
110  {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},
111  {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
112  {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},
113  {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
114  {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},
115  {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
116  {64, 1}, {64, 64}, {65, 1}, {65, 65}}};
117 
118 // We have: 9 calculated features (the features here); 1 feature for each
119 // instruction opcode; and 1 feature for each manually-identified sequence.
120 // For the latter 2, we build a histogram: we count the number of
121 // occurrences of each instruction opcode or succession of instructions,
122 // respectively.
123 // Note that instruction opcodes start from 1. For convenience, we also have an
124 // always 0 feature for the '0' opcode, hence the extra 1.
125 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
126  ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
127  IRToNativeSizeLearning::NumNamedFeatures;
128 
129 size_t getSize(Function &F, TargetTransformInfo &TTI) {
130  size_t Ret = 0;
131  for (const auto &BB : F)
132  for (const auto &I : BB)
134  &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
135  return Ret;
136 }
137 
138 size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
139  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
140  return getSize(F, TTI);
141 }
142 
143 unsigned getMaxDominatorTreeDepth(const Function &F,
144  const DominatorTree &Tree) {
145  unsigned Ret = 0;
146  for (const auto &BB : F)
147  if (const auto *TN = Tree.getNode(&BB))
148  Ret = std::max(Ret, TN->getLevel());
149  return Ret;
150 }
151 } // namespace
152 
153 IRToNativeSizeLearning::FunctionFeatures
154 IRToNativeSizeLearning::getFunctionFeatures(Function &F,
156  assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
157  "expected function features are sorted");
158 
159  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
160  FunctionFeatures FF;
161  size_t InstrCount = getMaxInstructionID() + 1;
162  FF.InstructionHistogram.resize(InstrCount);
163 
164  FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
165 
166  int StartID = 0;
167  int LastID = StartID;
168  auto getPairIndex = [](size_t a, size_t b) {
169  auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
170  if (I == ImportantInstructionSuccessions.end())
171  return -1;
172  return static_cast<int>(
173  std::distance(ImportantInstructionSuccessions.begin(), I));
174  };
175 
176  // We don't want debug calls, because they'd just add noise.
177  for (const auto &BB : F) {
178  for (const auto &I : BB.instructionsWithoutDebug()) {
179  auto ID = I.getOpcode();
180 
181  ++FF.InstructionHistogram[ID];
182  int PairIndex = getPairIndex(LastID, ID);
183  if (PairIndex >= 0)
184  ++FF.InstructionPairHistogram[PairIndex];
185  LastID = ID;
186  if (isa<CallBase>(I))
187  ++FF[NamedFeatureIndex::Calls];
188  }
189  }
190 
191  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
192  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
193  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
194  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
195  FF[NamedFeatureIndex::Blocks] =
196  std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
197  auto &LI = FAM.getResult<LoopAnalysis>(F);
198  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
199  for (auto &L : LI)
200  FF[NamedFeatureIndex::MaxLoopDepth] =
201  std::max(FF[NamedFeatureIndex::MaxLoopDepth],
202  static_cast<int32_t>(L->getLoopDepth()));
203  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
204  return FF;
205 }
206 
207 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
208  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
209  Ptr += NamedFeatures.size();
210  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
211  Ptr += InstructionHistogram.size();
212  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
213  Ptr);
214 }
215 
217  return !TFIR2NativeModelPath.empty();
218 }
219 
221  if (!isEvaluatorRequested()) {
222  return;
223  }
224  std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
225  "serving_default_input_1",
226  {1, static_cast<int64_t>(
227  IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
228  std::vector<TensorSpec> OutputSpecs{
229  TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
230  Evaluator = std::make_unique<TFModelEvaluator>(
231  TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
232  if (!Evaluator || !Evaluator->isValid()) {
233  Evaluator.reset();
234  return;
235  }
236 }
237 
241  if (!Evaluator)
242  return None;
243  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
244  const_cast<Function &>(F), FAM);
245  int32_t *V = Evaluator->getInput<int32_t>(0);
246  Features.fillTensor(V);
247  auto ER = Evaluator->evaluate();
248  if (!ER)
249  return None;
250  float Ret = *ER->getTensorValue<float>(0);
251  if (Ret < 0.0)
252  Ret = 0.0;
253  return static_cast<size_t>(Ret);
254 }
255 
260 
261 #else
262 namespace llvm {
264 } // namespace llvm
265 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
272  return None;
273 }
275 #endif
276 
280  OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
281  << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
282  return PreservedAnalyses::all();
283 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
TFUtils.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2320
llvm
Definition: AllocatorList.h:23
llvm::InlineSizeEstimatorAnalysis::isEvaluatorRequested
static bool isEvaluatorRequested()
Definition: InlineSizeEstimatorAnalysis.cpp:274
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:68
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
llvm::TFModelEvaluator
Definition: InlineSizeEstimatorAnalysis.cpp:263
llvm::Function
Definition: Function.h:61
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
InstrCount
static unsigned InstrCount
Definition: DFAPacketizer.cpp:53
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::Optional
Definition: APInt.h:33
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
F
#define F(x, y, z)
Definition: MD5.cpp:56
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
CommandLine.h
llvm::InlineSizeEstimatorAnalysisPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: InlineSizeEstimatorAnalysis.cpp:278
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
TargetLibraryInfo.h
llvm::None
const NoneType None
Definition: None.h:23
LoopInfo.h
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1422
InlineSizeEstimatorAnalysis.h
llvm::InlineSizeEstimatorAnalysis
Definition: InlineSizeEstimatorAnalysis.h:19
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1502
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::Evaluator
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:38
llvm::InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis
~InlineSizeEstimatorAnalysis()
Definition: InlineSizeEstimatorAnalysis.cpp:268
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1540
MCAsmLayout.h
llvm::InlineSizeEstimatorAnalysis::Key
static AnalysisKey Key
Definition: InlineSizeEstimatorAnalysis.h:26
std
Definition: BitVector.h:838
llvm::InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis
InlineSizeEstimatorAnalysis()
Definition: InlineSizeEstimatorAnalysis.cpp:265
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Casting.h
Function.h
PassManager.h
llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1553
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:224
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
Instructions.h
Dominators.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
copy
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local copy
Definition: README.txt:101
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1233
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1169
llvm::InlineSizeEstimatorAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &FAM)
Definition: InlineSizeEstimatorAnalysis.cpp:270
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38