LLVM  14.0.0git
InlineSizeEstimatorAnalysis.cpp
Go to the documentation of this file.
1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements feature and label extraction for offline supervised learning
10 // of a IR to native size model.
11 //
12 //===----------------------------------------------------------------------===//
14 
15 #ifdef LLVM_HAVE_TF_API
17 #endif
18 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/IR/BasicBlock.h"
22 #include "llvm/IR/Dominators.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/MC/MCAsmLayout.h"
27 #include "llvm/Support/Casting.h"
30 
31 #include <algorithm>
32 #include <deque>
33 
34 using namespace llvm;
35 
37 
38 #define DEBUG_TYPE "inline-size-estimator"
39 
40 #ifdef LLVM_HAVE_TF_API
41 cl::opt<std::string> TFIR2NativeModelPath(
42  "ml-inliner-ir2native-model", cl::Hidden,
43  cl::desc("Path to saved model evaluating native size from IR."));
44 
45 namespace {
46 unsigned getMaxInstructionID() {
47 #define LAST_OTHER_INST(NR) return NR;
48 #include "llvm/IR/Instruction.def"
49 }
50 
51 class IRToNativeSizeLearning {
52 public:
53  enum class NamedFeatureIndex : size_t {
54  InitialSize,
55  Blocks,
56  Calls,
57  IsLocal,
58  IsLinkOnceODR,
59  IsLinkOnce,
60  Loops,
61  MaxLoopDepth,
62  MaxDomTreeLevel,
63 
64  NumNamedFeatures
65  };
66  static const size_t NumNamedFeatures =
67  static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
68  struct FunctionFeatures {
69  static const size_t FeatureCount;
70 
71  std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
72  std::vector<int32_t> InstructionHistogram;
73  std::vector<int32_t> InstructionPairHistogram;
74 
75  void fillTensor(int32_t *Ptr) const;
76  int32_t &operator[](NamedFeatureIndex Pos) {
77  return NamedFeatures[static_cast<size_t>(Pos)];
78  }
79  };
80  IRToNativeSizeLearning() = default;
81 
82  static FunctionFeatures getFunctionFeatures(Function &F,
84 };
85 
86 // This is a point in time - we determined including these pairs of
87 // consecutive instructions (in the IR layout available at inline time) as
88 // features improves the model performance. We want to move away from manual
89 // feature selection.
90 // The array is given in opcode pairs rather than labels because 1) labels
91 // weren't readily available, and 2) the successions were hand - extracted.
92 //
93 // This array must be sorted.
94 static const std::array<std::pair<size_t, size_t>, 137>
95  ImportantInstructionSuccessions{
96  {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11},
97  {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24},
98  {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31},
99  {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45},
100  {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33},
101  {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56},
102  {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
103  {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31},
104  {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15},
105  {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
106  {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32},
107  {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2},
108  {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
109  {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56},
110  {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
111  {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53},
112  {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
113  {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34},
114  {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
115  {64, 1}, {64, 64}, {65, 1}, {65, 65}}};
116 
117 // We have: 9 calculated features (the features here); 1 feature for each
118 // instruction opcode; and 1 feature for each manually-identified sequence.
119 // For the latter 2, we build a histogram: we count the number of
120 // occurrences of each instruction opcode or succession of instructions,
121 // respectively.
122 // Note that instruction opcodes start from 1. For convenience, we also have an
123 // always 0 feature for the '0' opcode, hence the extra 1.
124 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
125  ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
126  IRToNativeSizeLearning::NumNamedFeatures;
127 
128 size_t getSize(Function &F, TargetTransformInfo &TTI) {
129  size_t Ret = 0;
130  for (const auto &BB : F)
131  for (const auto &I : BB)
133  &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
134  return Ret;
135 }
136 
137 size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
138  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
139  return getSize(F, TTI);
140 }
141 
142 unsigned getMaxDominatorTreeDepth(const Function &F,
143  const DominatorTree &Tree) {
144  unsigned Ret = 0;
145  for (const auto &BB : F)
146  if (const auto *TN = Tree.getNode(&BB))
147  Ret = std::max(Ret, TN->getLevel());
148  return Ret;
149 }
150 } // namespace
151 
152 IRToNativeSizeLearning::FunctionFeatures
153 IRToNativeSizeLearning::getFunctionFeatures(Function &F,
155  assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
156  "expected function features are sorted");
157 
158  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
159  FunctionFeatures FF;
160  size_t InstrCount = getMaxInstructionID() + 1;
161  FF.InstructionHistogram.resize(InstrCount);
162 
163  FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
164 
165  int StartID = 0;
166  int LastID = StartID;
167  auto getPairIndex = [](size_t a, size_t b) {
168  auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
169  if (I == ImportantInstructionSuccessions.end())
170  return -1;
171  return static_cast<int>(
172  std::distance(ImportantInstructionSuccessions.begin(), I));
173  };
174 
175  // We don't want debug calls, because they'd just add noise.
176  for (const auto &BB : F) {
177  for (const auto &I : BB.instructionsWithoutDebug()) {
178  auto ID = I.getOpcode();
179 
180  ++FF.InstructionHistogram[ID];
181  int PairIndex = getPairIndex(LastID, ID);
182  if (PairIndex >= 0)
183  ++FF.InstructionPairHistogram[PairIndex];
184  LastID = ID;
185  if (isa<CallBase>(I))
186  ++FF[NamedFeatureIndex::Calls];
187  }
188  }
189 
190  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
191  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
192  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
193  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
194  FF[NamedFeatureIndex::Blocks] =
195  std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
196  auto &LI = FAM.getResult<LoopAnalysis>(F);
197  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
198  for (auto &L : LI)
199  FF[NamedFeatureIndex::MaxLoopDepth] =
200  std::max(FF[NamedFeatureIndex::MaxLoopDepth],
201  static_cast<int32_t>(L->getLoopDepth()));
202  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
203  return FF;
204 }
205 
206 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
207  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
208  Ptr += NamedFeatures.size();
209  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
210  Ptr += InstructionHistogram.size();
211  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
212  Ptr);
213 }
214 
216  return !TFIR2NativeModelPath.empty();
217 }
218 
220  if (!isEvaluatorRequested()) {
221  return;
222  }
223  std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
224  "serving_default_input_1",
225  {1, static_cast<int64_t>(
226  IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
227  std::vector<TensorSpec> OutputSpecs{
228  TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
229  Evaluator = std::make_unique<TFModelEvaluator>(
230  TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
231  if (!Evaluator || !Evaluator->isValid()) {
232  Evaluator.reset();
233  return;
234  }
235 }
236 
240  if (!Evaluator)
241  return None;
242  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
243  const_cast<Function &>(F), FAM);
244  int32_t *V = Evaluator->getInput<int32_t>(0);
245  Features.fillTensor(V);
246  auto ER = Evaluator->evaluate();
247  if (!ER)
248  return None;
249  float Ret = *ER->getTensorValue<float>(0);
250  if (Ret < 0.0)
251  Ret = 0.0;
252  return static_cast<size_t>(Ret);
253 }
254 
259 
260 #else
261 namespace llvm {
263 } // namespace llvm
264 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
271  return None;
272 }
274 #endif
275 
279  OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
280  << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
281  return PreservedAnalyses::all();
282 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
TFUtils.h
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::InlineSizeEstimatorAnalysis::isEvaluatorRequested
static bool isEvaluatorRequested()
Definition: InlineSizeEstimatorAnalysis.cpp:273
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
llvm::TFModelEvaluator
Definition: InlineSizeEstimatorAnalysis.cpp:262
llvm::Function
Definition: Function.h:61
Loops
Hexagon Hardware Loops
Definition: HexagonHardwareLoops.cpp:372
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
InstrCount
static unsigned InstrCount
Definition: DFAPacketizer.cpp:53
llvm::DominatorTreeBase::getNode
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Definition: GenericDomTree.h:351
llvm::Optional
Definition: APInt.h:33
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
F
#define F(x, y, z)
Definition: MD5.cpp:56
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
CommandLine.h
llvm::InlineSizeEstimatorAnalysisPrinterPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: InlineSizeEstimatorAnalysis.cpp:277
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
TargetLibraryInfo.h
llvm::None
const NoneType None
Definition: None.h:23
LoopInfo.h
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1434
InlineSizeEstimatorAnalysis.h
llvm::InlineSizeEstimatorAnalysis
Definition: InlineSizeEstimatorAnalysis.h:19
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1571
llvm::AnalysisKey
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:72
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::Evaluator
This class evaluates LLVM IR, producing the Constant representing each SSA instruction.
Definition: Evaluator.h:38
llvm::InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis
~InlineSizeEstimatorAnalysis()
Definition: InlineSizeEstimatorAnalysis.cpp:267
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
MCAsmLayout.h
llvm::InlineSizeEstimatorAnalysis::Key
static AnalysisKey Key
Definition: InlineSizeEstimatorAnalysis.h:26
std
Definition: BitVector.h:838
llvm::InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis
InlineSizeEstimatorAnalysis()
Definition: InlineSizeEstimatorAnalysis.cpp:264
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Casting.h
Function.h
PassManager.h
llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1622
llvm::TargetTransformInfo::getInstructionCost
InstructionCost getInstructionCost(const Instruction *I, enum TargetCostKind kind) const
Query the cost of a specified instruction.
Definition: TargetTransformInfo.h:225
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
Instructions.h
Dominators.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
copy
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local copy
Definition: README.txt:101
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172
llvm::InlineSizeEstimatorAnalysis::run
Result run(const Function &F, FunctionAnalysisManager &FAM)
Definition: InlineSizeEstimatorAnalysis.cpp:269
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37