LLVM 22.0.0git
DevelopmentModeInlineAdvisor.cpp
Go to the documentation of this file.
1//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a model runner using TFLite, allowing the
10// loading of a model from a command line option.
11//
12//===----------------------------------------------------------------------===//
14#include "llvm/Config/config.h"
15#if defined(LLVM_HAVE_TFLITE)
16
17#include "llvm/ADT/BitVector.h"
25#include "llvm/IR/LLVMContext.h"
26#include "llvm/IR/Module.h"
29
30#include <optional>
31#include <vector>
32
33using namespace llvm;
34
35static cl::opt<std::string> TrainingLog(
36 "training-log", cl::Hidden,
37 cl::desc("Path where the development - mode inlining log is saved."));
38
39static cl::opt<std::string> TFModelUnderTrainingPath(
40 "ml-inliner-model-under-training", cl::Hidden,
41 cl::desc(R"(Path to SavedModel from the previous training iteration.
42The directory is also expected to contain a JSON specification of the
43outputs expected to be logged, where the first entry must be the
44inlining decision. The file containing the specification should be
45called output_spec.json. The expected JSON value is an array of
46dictionaries. Each dictionary should have 2 keys:
47
48- "tensor_spec, followed by the TensorSpec description of the
49output; and
50- "logging_name", a string indicating the name to use when
51logging the output values.
52
53Example:
54[
55 {
56 "logging_name" : "some_name",
57 "tensor_spec" : {
58 "name" : "model_name",
59 "port" : 0,
60 "shape" : [2, 3],
61 "type" : "float"
62 }
63 }
64]
65
66The first value must always correspond to the decision.)"));
67
68static cl::opt<std::string> TFOutputSpecOverride(
69 "ml-inliner-output-spec-override", cl::Hidden,
70 cl::desc("Override the path to the output spec json file. See "
71 "-ml-inliner-model-under-training documentation for the "
72 "specification of that file."));
73
74static cl::opt<std::string> TFFeedPrefix("ml-inliner-trained-model-feed-prefix",
75 cl::Hidden, cl::init("action_"),
76 cl::desc("Prefix for feature names."));
77
78namespace {
79/// An InlineEvent, used by TrainingLogger.
80struct InlineEvent {
81 /// What the default policy's decision would have been.
82 int64_t DefaultDecision = 0;
83
84 /// What we advised. When training off the default policy, this is the same as
85 /// DefaultDecision.
86 int64_t AdvisedDecision = 0;
87
88 /// What actually happened. This would be 'false' in the case of an inline
89 /// error, even if AdvisedDecision were true, otherwise it agrees with
90 /// AdvisedDecision.
91 bool Effect = false;
92
93 /// What the change in size was: size_after - size_before
94 int64_t Reward = 0;
95};
96
97/// Collect data we may use for training a model.
98class TrainingLogger final {
99public:
100 TrainingLogger(StringRef LogFileName, const ModelUnderTrainingRunner *MUTR,
101 const std::vector<TensorSpec> &FeatureMap);
102
103 /// Log one inlining event.
104 void logInlineEvent(const InlineEvent &Event,
105 const MLModelRunner &ModelRunner);
106
107private:
108 StringRef LogFileName;
109 const ModelUnderTrainingRunner *const MUTR;
110 const std::vector<TensorSpec> &FeatureMap;
111
112 std::unique_ptr<Logger> L;
113 BitVector Effects;
114 /// Set these 2 clearly OOB, to make sure we set them later.
115 size_t DefaultDecisionPos = std::numeric_limits<size_t>::max();
116 size_t DecisionPos = std::numeric_limits<size_t>::max();
117};
118
119/// An extension of the MLInlineAdvisor for the 'development' mode, targeting
120/// the offline training scenario. Note that training happens outside of the
121/// compiler, this facility is concerned with producing training data ("logs").
122/// This InlineAdvisor can operate in the following modes:
123///
124/// 1) collect logs for the default policy. This is useful for bootstrapping
125/// training, which will be considerably faster by starting from a reasonable
126/// policy.
127///
128/// 2) collect logs for the ML policy, using a model from a previous
129/// training. Potentially, that model uses internally some small random
130/// perturbation of its weights, to induce exploration (setting this up is the
131/// responsibility of the training algorithm). The logs would then be used to
132/// retrain and improve on this model.
133///
134/// 3) use the provided model, with no logging. This is useful for end to end
135/// validation - the model, in this case, is a release candidate and shouldn't
136/// have random perturbations. It is a convenience feature: rather than needing
137/// to take the release candidate model and compile it in 'release' mode,
138/// validate it, then potentially discard it, it's easier to just pass the model
139/// to the compiler, albeit compilation would be slower, as a one-off. Once the
140/// model behaves satisfactorily, it can be compiled AOT, for efficiency, in
141/// release mode. The expectation is that a well-trained model provides a good
142/// policy over a sufficiently diverse codebase, over many changes (i.e.
143/// training happens seldom).
144class DevelopmentModeMLInlineAdvisor : public MLInlineAdvisor {
145public:
146 DevelopmentModeMLInlineAdvisor(
147 Module &M, ModuleAnalysisManager &MAM,
148 std::function<
149 std::unique_ptr<MLModelRunner>(const std::vector<TensorSpec> &)>
150 GetModelRunner,
151 std::function<bool(CallBase &)> GetDefaultAdvice);
152
153 size_t getTotalSizeEstimate();
154
155 void updateNativeSizeEstimate(int64_t Change) {
156 *CurrentNativeSize += Change;
157 }
158 void resetNativeSize(Function *F) {
159 PreservedAnalyses PA = PreservedAnalyses::all();
160 PA.abandon<InlineSizeEstimatorAnalysis>();
161 FAM.invalidate(*F, PA);
162 }
163
164 std::unique_ptr<MLInlineAdvice>
165 getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE) override;
166
167 std::optional<size_t> getNativeSizeEstimate(const Function &F) const;
168
169private:
170 bool isLogging() const { return !!Logger; }
171 std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
172
173 const bool IsDoingInference;
174 std::unique_ptr<TrainingLogger> Logger;
175
176 const std::optional<int32_t> InitialNativeSize;
177 std::optional<int32_t> CurrentNativeSize;
178};
179
180/// A variant of MLInlineAdvice that tracks all non-trivial inlining
181/// decisions, for training/logging.
182class LoggingMLInlineAdvice : public MLInlineAdvice {
183public:
184 LoggingMLInlineAdvice(DevelopmentModeMLInlineAdvisor *Advisor, CallBase &CB,
185 OptimizationRemarkEmitter &ORE, bool Recommendation,
186 TrainingLogger &Logger,
187 std::optional<size_t> CallerSizeEstimateBefore,
188 std::optional<size_t> CalleeSizeEstimateBefore,
189 bool DefaultDecision, bool Mandatory = false)
190 : MLInlineAdvice(Advisor, CB, ORE, Recommendation), Logger(Logger),
191 CallerSizeEstimateBefore(CallerSizeEstimateBefore),
192 CalleeSizeEstimateBefore(CalleeSizeEstimateBefore),
193 DefaultDecision(DefaultDecision), Mandatory(Mandatory) {}
194
195 virtual ~LoggingMLInlineAdvice() = default;
196
197private:
198 DevelopmentModeMLInlineAdvisor *getAdvisor() const {
199 return static_cast<DevelopmentModeMLInlineAdvisor *>(Advisor);
200 }
201 void recordInliningImpl() override {
202 MLInlineAdvice::recordInliningImpl();
203 getAdvisor()->resetNativeSize(Caller);
204 int Reward = std::numeric_limits<int>::max();
205 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
206 !getAdvisor()->isForcedToStop()) {
207 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller) +
208 *CalleeSizeEstimateBefore;
209 Reward = NativeSizeAfter -
210 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
211 getAdvisor()->updateNativeSizeEstimate(Reward);
212 }
213 log(Reward, /*Success=*/true);
214 }
215
216 void recordInliningWithCalleeDeletedImpl() override {
217 MLInlineAdvice::recordInliningWithCalleeDeletedImpl();
218 getAdvisor()->resetNativeSize(Caller);
219 if (InlineSizeEstimatorAnalysis::isEvaluatorRequested() &&
220 !getAdvisor()->isForcedToStop()) {
221 int NativeSizeAfter = *getAdvisor()->getNativeSizeEstimate(*Caller);
222 int Reward = NativeSizeAfter -
223 (*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
224 getAdvisor()->updateNativeSizeEstimate(Reward);
225 log(Reward, /*Success=*/true);
226 } else {
227 log(NoReward, /*Success=*/true);
228 }
229 }
230
231 void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
232 MLInlineAdvice::recordUnsuccessfulInliningImpl(Result);
233 log(NoReward, /*Success=*/false);
234 }
235
236 void recordUnattemptedInliningImpl() override {
237 MLInlineAdvice::recordUnattemptedInliningImpl();
238 log(NoReward, /*Success=*/false);
239 }
240
241 void log(int64_t Reward, bool Success) {
242 if (Mandatory)
243 return;
244 InlineEvent Event;
245 Event.AdvisedDecision = isInliningRecommended();
246 Event.DefaultDecision = DefaultDecision;
247 Event.Effect = Success;
248 Event.Reward = Reward;
249 Logger.logInlineEvent(Event, getAdvisor()->getModelRunner());
250 }
251
252 static const int64_t NoReward = 0;
253 TrainingLogger &Logger;
254 const std::optional<size_t> CallerSizeEstimateBefore;
255 const std::optional<size_t> CalleeSizeEstimateBefore;
256 const int64_t DefaultDecision;
257 const int64_t Mandatory;
258};
259
260static const std::vector<TensorSpec> TrainingOnlyFeatures{
261 TensorSpec::createSpec<float>(TFFeedPrefix + "discount", {1}),
262 TensorSpec::createSpec<float>(TFFeedPrefix + "reward", {1}),
263 TensorSpec::createSpec<int32_t>(TFFeedPrefix + "step_type", {1})};
264
265// add TFFeedPrefix to the names and also add the "TrainingOnlyFeatures" which
266// the model runner needs to see present. We don't set them ourselves or
267// interact with them.
268static const std::vector<TensorSpec>
269convertInputFeatures(const std::vector<TensorSpec> &OriginalFeatures) {
270 std::vector<TensorSpec> InputSpecs;
271 for (const auto &Feature : OriginalFeatures)
272 InputSpecs.push_back(TensorSpec(TFFeedPrefix + Feature.name(), Feature));
273 append_range(InputSpecs, TrainingOnlyFeatures);
274 return InputSpecs;
275}
276
277} // namespace
278
279TrainingLogger::TrainingLogger(StringRef LogFileName,
280 const ModelUnderTrainingRunner *MUTR,
281 const std::vector<TensorSpec> &FeatureMap)
282 : LogFileName(LogFileName), MUTR(MUTR), FeatureMap(FeatureMap) {
283 // The first output is the inlining decision.
284 std::vector<TensorSpec> FT(FeatureMap.begin(), FeatureMap.end());
285
286 if (MUTR)
287 append_range(FT, MUTR->extraOutputsForLoggingSpecs());
288
289 DefaultDecisionPos = FT.size();
290 FT.push_back(DefaultDecisionSpec);
291
292 DecisionPos = FT.size();
293 FT.push_back(InlineDecisionSpec);
294 std::error_code EC;
295 auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
296 if (EC)
297 dbgs() << (EC.message() + ":" + TrainingLog);
298
299 L = std::make_unique<Logger>(
300 std::move(OS), FT, TensorSpec::createSpec<int64_t>(RewardName, {1}),
302 L->switchContext("");
303}
304
305/// Log one inlining event.
306void TrainingLogger::logInlineEvent(const InlineEvent &Event,
307 const MLModelRunner &ModelRunner) {
308 L->startObservation();
309 size_t CurrentFeature = 0;
310 for (; CurrentFeature < FeatureMap.size(); ++CurrentFeature)
311 L->logTensorValue(CurrentFeature,
312 reinterpret_cast<const char *>(
313 ModelRunner.getTensorUntyped(CurrentFeature)));
314
315 if (MUTR)
316 for (size_t I = 0; I < MUTR->extraOutputsForLoggingSpecs().size(); ++I) {
317 const char *RawData =
318 reinterpret_cast<const char *>(MUTR->getUntypedExtraOutputValue(I));
319 L->logTensorValue(CurrentFeature, RawData);
320 ++CurrentFeature;
321 }
322
323 assert(CurrentFeature == DefaultDecisionPos);
324 L->logTensorValue(DefaultDecisionPos,
325 reinterpret_cast<const char *>(&Event.DefaultDecision));
326 L->logTensorValue(DecisionPos,
327 reinterpret_cast<const char *>(&Event.AdvisedDecision));
328 L->endObservation();
330 L->logReward(Event.Reward);
331
332 // For debugging / later use
333 Effects.push_back(Event.Effect);
334}
335
336DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
338 std::function<
339 std::unique_ptr<MLModelRunner>(const std::vector<TensorSpec> &)>
340 GetModelRunner,
341 std::function<bool(CallBase &)> GetDefaultAdvice)
342 : MLInlineAdvisor(M, MAM, GetModelRunner, GetDefaultAdvice),
343 IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
344 InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
345 CurrentNativeSize(InitialNativeSize) {
346 // We cannot have the case of neither inference nor logging.
347 if (!TrainingLog.empty())
348 Logger = std::make_unique<TrainingLogger>(
349 TrainingLog, dyn_cast<ModelUnderTrainingRunner>(ModelRunner.get()),
350 getFeatureMap());
351 assert(IsDoingInference || isLogging());
352}
353
354std::optional<size_t>
355DevelopmentModeMLInlineAdvisor::getNativeSizeEstimate(const Function &F) const {
357 return std::nullopt;
358 auto &R =
359 FAM.getResult<InlineSizeEstimatorAnalysis>(const_cast<Function &>(F));
360 if (!R) {
361 F.getParent()->getContext().emitError(
362 "Native size estimator is not present.");
363 return 0;
364 }
365 return *R;
366}
367
368std::unique_ptr<MLInlineAdvice>
369DevelopmentModeMLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
370 return std::make_unique<LoggingMLInlineAdvice>(
371 /*Advisor=*/this,
372 /*CB=*/CB, /*ORE=*/getCallerORE(CB), /*Recommendation=*/true,
373 /*Logger=*/*Logger,
374 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
375 /*CalleeSizeEstimateBefore=*/
376 getNativeSizeEstimate(*CB.getCalledFunction()),
377 /*DefaultDecision=*/true, /*Mandatory*/ true);
378}
379
380std::unique_ptr<MLInlineAdvice>
381DevelopmentModeMLInlineAdvisor::getAdviceFromModel(
383 if (IsDoingInference && !isLogging())
385
386 bool DefaultAdvice = GetDefaultAdvice(CB);
387 auto Recommendation =
388 IsDoingInference ? static_cast<bool>(ModelRunner->evaluate<int64_t>())
389 : DefaultAdvice;
390 return std::make_unique<LoggingMLInlineAdvice>(
391 /*Advisor=*/this,
392 /*CB=*/CB, /*ORE=*/ORE, /*Recommendation=*/Recommendation,
393 /*Logger=*/*Logger,
394 /*CallerSizeEstimateBefore=*/getNativeSizeEstimate(*CB.getCaller()),
395 /*CalleeSizeEstimateBefore=*/
396 getNativeSizeEstimate(*CB.getCalledFunction()),
397 /*DefaultDecision=*/DefaultAdvice);
398}
399
400size_t DevelopmentModeMLInlineAdvisor::getTotalSizeEstimate() {
402 return 0;
403 size_t Ret = 0;
404 for (auto &F : M) {
405 if (F.isDeclaration())
406 continue;
407 Ret += *getNativeSizeEstimate(F);
408 }
409 return Ret;
410}
411
412std::unique_ptr<InlineAdvisor> llvm::getDevelopmentModeAdvisor(
414 std::function<bool(CallBase &)> GetDefaultAdvice) {
415 auto &Ctx = M.getContext();
416 auto RunnerFactory = [&](const std::vector<TensorSpec> &InputFeatures)
417 -> std::unique_ptr<MLModelRunner> {
418 std::unique_ptr<MLModelRunner> Runner;
419 const std::vector<TensorSpec> ConvertedFeatures =
420 convertInputFeatures(InputFeatures);
421 if (TFModelUnderTrainingPath.empty())
422 Runner.reset(new NoInferenceModelRunner(Ctx, ConvertedFeatures));
423 else
424 Runner = ModelUnderTrainingRunner::createAndEnsureValid(
425 Ctx, TFModelUnderTrainingPath, DecisionName, ConvertedFeatures,
426 TFOutputSpecOverride);
427 if (!Runner)
428 return nullptr;
429 return Runner;
430 };
431 return std::make_unique<DevelopmentModeMLInlineAdvisor>(M, MAM, RunnerFactory,
432 GetDefaultAdvice);
433}
434#endif // defined(LLVM_HAVE_TFLITE)
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements the BitVector class.
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define DecisionName
Machine Check Debug Module
FunctionAnalysisManager FAM
ModuleAnalysisManager MAM
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
Logging utility - given an ordered specification of features, and assuming a scalar reward,...
InlineAdvice that tracks changes post inlining.
virtual std::unique_ptr< MLInlineAdvice > getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE)
MLModelRunner interface: abstraction of a mechanism for evaluating a ML model.
void * getTensorUntyped(size_t Index)
The optimization diagnostic interface.
PreservedAnalyses & abandon()
Mark an analysis as abandoned.
Definition Analysis.h:171
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static TensorSpec createSpec(const std::string &Name, const std::vector< int64_t > &Shape, int Port=0)
Definition TensorSpec.h:66
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1685
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI std::unique_ptr< InlineAdvisor > getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM, std::function< bool(CallBase &)> GetDefaultAdvice)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2138
LLVM_ABI const TensorSpec DefaultDecisionSpec
static const std::vector< TensorSpec > InputFeatures
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI const TensorSpec InlineDecisionSpec
LLVM_ABI const char *const RewardName
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39