LLVM 22.0.0git
IR2Vec.h
Go to the documentation of this file.
1//===- IR2Vec.h - Implementation of IR2Vec ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM
4// Exceptions. See the LICENSE file for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the IR2Vec vocabulary analysis(IR2VecVocabAnalysis),
11/// the core ir2vec::Embedder interface for generating IR embeddings,
12/// and related utilities like the IR2VecPrinterPass.
13///
14/// Program Embeddings are typically or derived-from a learned
15/// representation of the program. Such embeddings are used to represent the
16/// programs as input to machine learning algorithms. IR2Vec represents the
17/// LLVM IR as embeddings.
18///
19/// The IR2Vec algorithm is described in the following paper:
20///
21/// IR2Vec: LLVM IR Based Scalable Program Embeddings, S. VenkataKeerthy,
22/// Rohit Aggarwal, Shalini Jain, Maunendra Sankar Desarkar, Ramakrishna
23/// Upadrasta, and Y. N. Srikant, ACM Transactions on Architecture and
24/// Code Optimization (TACO), 2020. https://doi.org/10.1145/3418463.
25/// https://arxiv.org/abs/1909.06228
26///
27/// To obtain embeddings:
28/// First run IR2VecVocabAnalysis to populate the vocabulary.
29/// Then, use the Embedder interface to generate embeddings for the desired IR
30/// entities. See the documentation for more details -
31/// https://llvm.org/docs/MLGO.html#ir2vec-embeddings
32///
33//===----------------------------------------------------------------------===//
34
35#ifndef LLVM_ANALYSIS_IR2VEC_H
36#define LLVM_ANALYSIS_IR2VEC_H
37
38#include "llvm/ADT/DenseMap.h"
40#include "llvm/IR/PassManager.h"
41#include "llvm/IR/Type.h"
45#include "llvm/Support/JSON.h"
46#include <array>
47#include <map>
48#include <optional>
49
50namespace llvm {
51
52class Module;
53class BasicBlock;
54class Instruction;
55class Function;
56class Value;
57class raw_ostream;
58class LLVMContext;
60
61/// IR2Vec computes two kinds of embeddings: Symbolic and Flow-aware.
62/// Symbolic embeddings capture the "syntactic" and "statistical correlation"
63/// of the IR entities. Flow-aware embeddings build on top of symbolic
64/// embeddings and additionally capture the flow information in the IR.
65/// IR2VecKind is used to specify the type of embeddings to generate.
66/// Note: Implementation of FlowAware embeddings is not same as the one
67/// described in the paper. The current implementation is a simplified version
68/// that captures the flow information (SSA-based use-defs) without tracing
69/// through memory level use-defs in the embedding computation described in the
70/// paper.
72
73namespace ir2vec {
74
80
81/// Embedding is a datatype that wraps std::vector<double>. It provides
82/// additional functionality for arithmetic and comparison operations.
83/// It is meant to be used *like* std::vector<double> but is more restrictive
84/// in the sense that it does not allow the user to change the size of the
85/// embedding vector. The dimension of the embedding is fixed at the time of
86/// construction of Embedding object. But the elements can be modified in-place.
87struct Embedding {
88private:
89 std::vector<double> Data;
90
91public:
92 Embedding() = default;
93 Embedding(const std::vector<double> &V) : Data(V) {}
94 Embedding(std::vector<double> &&V) : Data(std::move(V)) {}
95 Embedding(std::initializer_list<double> IL) : Data(IL) {}
96
97 explicit Embedding(size_t Size) : Data(Size, 0.0) {}
98 Embedding(size_t Size, double InitialValue) : Data(Size, InitialValue) {}
99
100 size_t size() const { return Data.size(); }
101 bool empty() const { return Data.empty(); }
102
103 double &operator[](size_t Itr) {
104 assert(Itr < Data.size() && "Index out of bounds");
105 return Data[Itr];
106 }
107
108 const double &operator[](size_t Itr) const {
109 assert(Itr < Data.size() && "Index out of bounds");
110 return Data[Itr];
111 }
112
113 using iterator = typename std::vector<double>::iterator;
114 using const_iterator = typename std::vector<double>::const_iterator;
115
116 iterator begin() { return Data.begin(); }
117 iterator end() { return Data.end(); }
118 const_iterator begin() const { return Data.begin(); }
119 const_iterator end() const { return Data.end(); }
120 const_iterator cbegin() const { return Data.cbegin(); }
121 const_iterator cend() const { return Data.cend(); }
122
123 const std::vector<double> &getData() const { return Data; }
124
125 /// Arithmetic operators
130 LLVM_ABI Embedding &operator*=(double Factor);
131 LLVM_ABI Embedding operator*(double Factor) const;
132
133 /// Adds Src Embedding scaled by Factor with the called Embedding.
134 /// Called_Embedding += Src * Factor
135 LLVM_ABI Embedding &scaleAndAdd(const Embedding &Src, float Factor);
136
137 /// Returns true if the embedding is approximately equal to the RHS embedding
138 /// within the specified tolerance.
140 double Tolerance = 1e-4) const;
141
142 LLVM_ABI void print(raw_ostream &OS) const;
143};
144
147
148/// Generic storage class for section-based vocabularies.
149/// VocabStorage provides a generic foundation for storing and accessing
150/// embeddings organized into sections.
152private:
153 /// Section-based storage
154 std::vector<std::vector<Embedding>> Sections;
155
156 const size_t TotalSize;
157 const unsigned Dimension;
158
159public:
160 /// Default constructor creates empty storage (invalid state)
161 VocabStorage() : Sections(), TotalSize(0), Dimension(0) {}
162
163 /// Create a VocabStorage with pre-organized section data
164 VocabStorage(std::vector<std::vector<Embedding>> &&SectionData);
165
168
169 VocabStorage(const VocabStorage &) = delete;
171
172 /// Get total number of entries across all sections
173 size_t size() const { return TotalSize; }
174
175 /// Get number of sections
176 unsigned getNumSections() const {
177 return static_cast<unsigned>(Sections.size());
178 }
179
180 /// Section-based access: Storage[sectionId][localIndex]
181 const std::vector<Embedding> &operator[](unsigned SectionId) const {
182 assert(SectionId < Sections.size() && "Invalid section ID");
183 return Sections[SectionId];
184 }
185
186 /// Get vocabulary dimension
187 unsigned getDimension() const { return Dimension; }
188
189 /// Check if vocabulary is valid (has data)
190 bool isValid() const { return TotalSize > 0; }
191
192 /// Iterator support for section-based access
194 const VocabStorage *Storage;
195 unsigned SectionId = 0;
196 size_t LocalIndex = 0;
197
198 public:
199 const_iterator(const VocabStorage *Storage, unsigned SectionId,
200 size_t LocalIndex)
201 : Storage(Storage), SectionId(SectionId), LocalIndex(LocalIndex) {}
202
203 LLVM_ABI const Embedding &operator*() const;
205 LLVM_ABI bool operator==(const const_iterator &Other) const;
206 LLVM_ABI bool operator!=(const const_iterator &Other) const;
207 };
208
209 const_iterator begin() const { return const_iterator(this, 0, 0); }
211 return const_iterator(this, getNumSections(), 0);
212 }
213};
214
215/// Class for storing and accessing the IR2Vec vocabulary.
216/// The Vocabulary class manages seed embeddings for LLVM IR entities. The
217/// seed embeddings are the initial learned representations of the entities
218/// of LLVM IR. The IR2Vec representation for a given IR is derived from these
219/// seed embeddings.
220///
221/// The vocabulary contains the seed embeddings for three types of entities:
222/// instruction opcodes, types, and operands. Types are grouped/canonicalized
223/// for better learning (e.g., all float variants map to FloatTy). The
224/// vocabulary abstracts away the canonicalization effectively, the exposed APIs
225/// handle all the known LLVM IR opcodes, types and operands.
226///
227/// This class helps populate the seed embeddings in an internal vector-based
228/// ADT. It provides logic to map every IR entity to a specific slot index or
229/// position in this vector, enabling O(1) embedding lookup while avoiding
230/// unnecessary computations involving string based lookups while generating the
231/// embeddings.
234
235 // Vocabulary Layout:
236 // +----------------+------------------------------------------------------+
237 // | Entity Type | Index Range |
238 // +----------------+------------------------------------------------------+
239 // | Opcodes | [0 .. (MaxOpcodes-1)] |
240 // | Canonical Types| [MaxOpcodes .. (MaxOpcodes+MaxCanonicalTypeIDs-1)] |
241 // | Operands | [(MaxOpcodes+MaxCanonicalTypeIDs) .. NumCanEntries] |
242 // +----------------+------------------------------------------------------+
243 // Note: MaxOpcodes is the number of unique opcodes supported by LLVM IR.
244 // MaxCanonicalTypeIDs is the number of canonicalized type IDs.
245 // "Similar" LLVM Types are grouped/canonicalized together. E.g., all
246 // float variants (FloatTy, DoubleTy, HalfTy, etc.) map to
247 // CanonicalTypeID::FloatTy. This helps reduce the vocabulary size
248 // and improves learning. Operands include Comparison predicates
249 // (ICmp/FCmp) along with other operand types. This can be extended to
250 // include other specializations in future.
251 enum class Section : unsigned {
252 Opcodes = 0,
253 CanonicalTypes = 1,
254 Operands = 2,
255 Predicates = 3,
256 MaxSections
257 };
258
259 // Use section-based storage for better organization and efficiency
260 VocabStorage Storage;
261
262 static constexpr unsigned NumICmpPredicates =
263 static_cast<unsigned>(CmpInst::LAST_ICMP_PREDICATE) -
264 static_cast<unsigned>(CmpInst::FIRST_ICMP_PREDICATE) + 1;
265 static constexpr unsigned NumFCmpPredicates =
266 static_cast<unsigned>(CmpInst::LAST_FCMP_PREDICATE) -
267 static_cast<unsigned>(CmpInst::FIRST_FCMP_PREDICATE) + 1;
268
269public:
270 /// Canonical type IDs supported by IR2Vec Vocabulary
286
287 /// Operand kinds supported by IR2Vec Vocabulary
295
296 /// Vocabulary layout constants
297#define LAST_OTHER_INST(NUM) static constexpr unsigned MaxOpcodes = NUM;
298#include "llvm/IR/Instruction.def"
299#undef LAST_OTHER_INST
300
301 static constexpr unsigned MaxTypeIDs = Type::TypeID::TargetExtTyID + 1;
302 static constexpr unsigned MaxCanonicalTypeIDs =
303 static_cast<unsigned>(CanonicalTypeID::MaxCanonicalType);
304 static constexpr unsigned MaxOperandKinds =
305 static_cast<unsigned>(OperandKind::MaxOperandKind);
306 // CmpInst::Predicate has gaps. We want the vocabulary to be dense without
307 // empty slots.
308 static constexpr unsigned MaxPredicateKinds =
309 NumICmpPredicates + NumFCmpPredicates;
310
311 Vocabulary() = default;
312 LLVM_ABI Vocabulary(VocabStorage &&Storage) : Storage(std::move(Storage)) {}
313
314 Vocabulary(const Vocabulary &) = delete;
315 Vocabulary &operator=(const Vocabulary &) = delete;
316
317 Vocabulary(Vocabulary &&) = default;
319
320 LLVM_ABI bool isValid() const {
321 return Storage.size() == NumCanonicalEntries;
322 }
323
324 LLVM_ABI unsigned getDimension() const {
325 assert(isValid() && "IR2Vec Vocabulary is invalid");
326 return Storage.getDimension();
327 }
328
329 /// Total number of entries (opcodes + canonicalized types + operand kinds +
330 /// predicates)
331 static constexpr size_t getCanonicalSize() { return NumCanonicalEntries; }
332
333 /// Function to get vocabulary key for a given Opcode
334 LLVM_ABI static StringRef getVocabKeyForOpcode(unsigned Opcode);
335
336 /// Function to get vocabulary key for a given TypeID
338 return getVocabKeyForCanonicalTypeID(getCanonicalTypeID(TypeID));
339 }
340
341 /// Function to get vocabulary key for a given OperandKind
343 unsigned Index = static_cast<unsigned>(Kind);
344 assert(Index < MaxOperandKinds && "Invalid OperandKind");
345 return OperandKindNames[Index];
346 }
347
348 /// Function to classify an operand into OperandKind
350
351 /// Function to get vocabulary key for a given predicate
353
354 /// Functions to return flat index
355 LLVM_ABI static unsigned getIndex(unsigned Opcode) {
356 assert(Opcode >= 1 && Opcode <= MaxOpcodes && "Invalid opcode");
357 return Opcode - 1; // Convert to zero-based index
358 }
359
361 assert(static_cast<unsigned>(TypeID) < MaxTypeIDs && "Invalid type ID");
362 return MaxOpcodes + static_cast<unsigned>(getCanonicalTypeID(TypeID));
363 }
364
365 LLVM_ABI static unsigned getIndex(const Value &Op) {
366 unsigned Index = static_cast<unsigned>(getOperandKind(&Op));
367 assert(Index < MaxOperandKinds && "Invalid OperandKind");
368 return OperandBaseOffset + Index;
369 }
370
372 return PredicateBaseOffset + getPredicateLocalIndex(P);
373 }
374
375 /// Accessors to get the embedding for a given entity.
376 LLVM_ABI const ir2vec::Embedding &operator[](unsigned Opcode) const {
377 assert(Opcode >= 1 && Opcode <= MaxOpcodes && "Invalid opcode");
378 return Storage[static_cast<unsigned>(Section::Opcodes)][Opcode - 1];
379 }
380
382 assert(static_cast<unsigned>(TypeID) < MaxTypeIDs && "Invalid type ID");
383 unsigned LocalIndex = static_cast<unsigned>(getCanonicalTypeID(TypeID));
384 return Storage[static_cast<unsigned>(Section::CanonicalTypes)][LocalIndex];
385 }
386
387 LLVM_ABI const ir2vec::Embedding &operator[](const Value &Arg) const {
388 unsigned LocalIndex = static_cast<unsigned>(getOperandKind(&Arg));
389 assert(LocalIndex < MaxOperandKinds && "Invalid OperandKind");
390 return Storage[static_cast<unsigned>(Section::Operands)][LocalIndex];
391 }
392
394 unsigned LocalIndex = getPredicateLocalIndex(P);
395 return Storage[static_cast<unsigned>(Section::Predicates)][LocalIndex];
396 }
397
398 /// Const Iterator type aliases
400
402 assert(isValid() && "IR2Vec Vocabulary is invalid");
403 return Storage.begin();
404 }
405
406 const_iterator cbegin() const { return begin(); }
407
409 assert(isValid() && "IR2Vec Vocabulary is invalid");
410 return Storage.end();
411 }
412
413 const_iterator cend() const { return end(); }
414
415 /// Returns the string key for a given index position in the vocabulary.
416 /// This is useful for debugging or printing the vocabulary. Do not use this
417 /// for embedding generation as string based lookups are inefficient.
418 LLVM_ABI static StringRef getStringKey(unsigned Pos);
419
420 /// Create a dummy vocabulary for testing purposes.
421 LLVM_ABI static VocabStorage createDummyVocabForTest(unsigned Dim = 1);
422
423 LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA,
424 ModuleAnalysisManager::Invalidator &Inv) const;
425
426private:
427 constexpr static unsigned NumCanonicalEntries =
429
430 // Base offsets for flat index computation
431 constexpr static unsigned OperandBaseOffset =
432 MaxOpcodes + MaxCanonicalTypeIDs;
433 constexpr static unsigned PredicateBaseOffset =
434 OperandBaseOffset + MaxOperandKinds;
435
436 /// Functions for predicate index calculations
437 static unsigned getPredicateLocalIndex(CmpInst::Predicate P);
438 static CmpInst::Predicate getPredicateFromLocalIndex(unsigned LocalIndex);
439
440 /// String mappings for CanonicalTypeID values
441 static constexpr StringLiteral CanonicalTypeNames[] = {
442 "FloatTy", "VoidTy", "LabelTy", "MetadataTy",
443 "VectorTy", "TokenTy", "IntegerTy", "FunctionTy",
444 "PointerTy", "StructTy", "ArrayTy", "UnknownTy"};
445 static_assert(std::size(CanonicalTypeNames) ==
446 static_cast<unsigned>(CanonicalTypeID::MaxCanonicalType),
447 "CanonicalTypeNames array size must match MaxCanonicalType");
448
449 /// String mappings for OperandKind values
450 static constexpr StringLiteral OperandKindNames[] = {"Function", "Pointer",
451 "Constant", "Variable"};
452 static_assert(std::size(OperandKindNames) ==
453 static_cast<unsigned>(OperandKind::MaxOperandKind),
454 "OperandKindNames array size must match MaxOperandKind");
455
456 /// Every known TypeID defined in llvm/IR/Type.h is expected to have a
457 /// corresponding mapping here in the same order as enum Type::TypeID.
458 static constexpr std::array<CanonicalTypeID, MaxTypeIDs> TypeIDMapping = {{
459 CanonicalTypeID::FloatTy, // HalfTyID = 0
460 CanonicalTypeID::FloatTy, // BFloatTyID
461 CanonicalTypeID::FloatTy, // FloatTyID
462 CanonicalTypeID::FloatTy, // DoubleTyID
463 CanonicalTypeID::FloatTy, // X86_FP80TyID
464 CanonicalTypeID::FloatTy, // FP128TyID
465 CanonicalTypeID::FloatTy, // PPC_FP128TyID
466 CanonicalTypeID::VoidTy, // VoidTyID
467 CanonicalTypeID::LabelTy, // LabelTyID
468 CanonicalTypeID::MetadataTy, // MetadataTyID
469 CanonicalTypeID::VectorTy, // X86_AMXTyID
470 CanonicalTypeID::TokenTy, // TokenTyID
471 CanonicalTypeID::IntegerTy, // IntegerTyID
472 CanonicalTypeID::FunctionTy, // FunctionTyID
473 CanonicalTypeID::PointerTy, // PointerTyID
474 CanonicalTypeID::StructTy, // StructTyID
475 CanonicalTypeID::ArrayTy, // ArrayTyID
476 CanonicalTypeID::VectorTy, // FixedVectorTyID
477 CanonicalTypeID::VectorTy, // ScalableVectorTyID
478 CanonicalTypeID::PointerTy, // TypedPointerTyID
479 CanonicalTypeID::UnknownTy // TargetExtTyID
480 }};
481 static_assert(TypeIDMapping.size() == MaxTypeIDs,
482 "TypeIDMapping must cover all Type::TypeID values");
483
484 /// Function to get vocabulary key for canonical type by enum
485 LLVM_ABI static StringRef
486 getVocabKeyForCanonicalTypeID(CanonicalTypeID CType) {
487 unsigned Index = static_cast<unsigned>(CType);
488 assert(Index < MaxCanonicalTypeIDs && "Invalid CanonicalTypeID");
489 return CanonicalTypeNames[Index];
490 }
491
492 /// Function to convert TypeID to CanonicalTypeID
493 LLVM_ABI static CanonicalTypeID getCanonicalTypeID(Type::TypeID TypeID) {
494 unsigned Index = static_cast<unsigned>(TypeID);
495 assert(Index < MaxTypeIDs && "Invalid TypeID");
496 return TypeIDMapping[Index];
497 }
498
499 /// Function to get the predicate enum value for a given index. Index is
500 /// relative to the predicates section of the vocabulary. E.g., Index 0
501 /// corresponds to the first predicate.
502 LLVM_ABI static CmpInst::Predicate getPredicate(unsigned Index) {
503 assert(Index < MaxPredicateKinds && "Invalid predicate index");
504 return getPredicateFromLocalIndex(Index);
505 }
506};
507
508/// Embedder provides the interface to generate embeddings (vector
509/// representations) for instructions, basic blocks, and functions. The
510/// vector representations are generated using IR2Vec algorithms.
511///
512/// The Embedder class is an abstract class and it is intended to be
513/// subclassed for different IR2Vec algorithms like Symbolic and Flow-aware.
514class Embedder {
515protected:
516 const Function &F;
518
519 /// Dimension of the vector representation; captured from the input vocabulary
520 const unsigned Dimension;
521
522 /// Weights for different entities (like opcode, arguments, types)
523 /// in the IR instructions to generate the vector representation.
525
526 // Utility maps - these are used to store the vector representations of
527 // instructions, basic blocks and functions.
531
532 LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab);
533
534 /// Function to compute embeddings. It generates embeddings for all
535 /// the instructions and basic blocks in the function F.
536 void computeEmbeddings() const;
537
538 /// Function to compute the embedding for a given basic block.
539 /// Specific to the kind of embeddings being computed.
540 virtual void computeEmbeddings(const BasicBlock &BB) const = 0;
541
542public:
543 virtual ~Embedder() = default;
544
545 /// Factory method to create an Embedder object.
546 LLVM_ABI static std::unique_ptr<Embedder>
548
549 /// Returns a map containing instructions and the corresponding embeddings for
550 /// the function F if it has been computed. If not, it computes the embeddings
551 /// for the function and returns the map.
553
554 /// Returns a map containing basic block and the corresponding embeddings for
555 /// the function F if it has been computed. If not, it computes the embeddings
556 /// for the function and returns the map.
557 LLVM_ABI const BBEmbeddingsMap &getBBVecMap() const;
558
559 /// Returns the embedding for a given basic block in the function F if it has
560 /// been computed. If not, it computes the embedding for the basic block and
561 /// returns it.
562 LLVM_ABI const Embedding &getBBVector(const BasicBlock &BB) const;
563
564 /// Computes and returns the embedding for the current function.
565 LLVM_ABI const Embedding &getFunctionVector() const;
566};
567
568/// Class for computing the Symbolic embeddings of IR2Vec.
569/// Symbolic embeddings are constructed based on the entity-level
570/// representations obtained from the Vocabulary.
572private:
573 void computeEmbeddings(const BasicBlock &BB) const override;
574
575public:
578};
579
580/// Class for computing the Flow-aware embeddings of IR2Vec.
581/// Flow-aware embeddings build on the vocabulary, just like Symbolic
582/// embeddings, and additionally capture the flow information in the IR.
584private:
585 void computeEmbeddings(const BasicBlock &BB) const override;
586
587public:
590};
591
592} // namespace ir2vec
593
594/// This analysis provides the vocabulary for IR2Vec. The vocabulary provides a
595/// mapping between an entity of the IR (like opcode, type, argument, etc.) and
596/// its corresponding embedding.
597class IR2VecVocabAnalysis : public AnalysisInfoMixin<IR2VecVocabAnalysis> {
598 using VocabMap = std::map<std::string, ir2vec::Embedding>;
599 std::optional<ir2vec::VocabStorage> Vocab;
600
601 Error readVocabulary(VocabMap &OpcVocab, VocabMap &TypeVocab,
602 VocabMap &ArgVocab);
603 Error parseVocabSection(StringRef Key, const json::Value &ParsedVocabValue,
604 VocabMap &TargetVocab, unsigned &Dim);
605 void generateVocabStorage(VocabMap &OpcVocab, VocabMap &TypeVocab,
606 VocabMap &ArgVocab);
607 void emitError(Error Err, LLVMContext &Ctx);
608
609public:
613 : Vocab(std::move(Vocab)) {}
616};
617
618/// This pass prints the IR2Vec embeddings for instructions, basic blocks, and
619/// functions.
620class IR2VecPrinterPass : public PassInfoMixin<IR2VecPrinterPass> {
621 raw_ostream &OS;
622
623public:
624 explicit IR2VecPrinterPass(raw_ostream &OS) : OS(OS) {}
626 static bool isRequired() { return true; }
627};
628
629/// This pass prints the embeddings in the vocabulary
630class IR2VecVocabPrinterPass : public PassInfoMixin<IR2VecVocabPrinterPass> {
631 raw_ostream &OS;
632
633public:
634 explicit IR2VecVocabPrinterPass(raw_ostream &OS) : OS(OS) {}
636 static bool isRequired() { return true; }
637};
638
639} // namespace llvm
640
641#endif // LLVM_ANALYSIS_IR2VEC_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ABI
Definition Compiler.h:213
This file defines the DenseMap class.
Provides ErrorOr<T> smart pointer.
This header defines various interfaces for pass management in LLVM.
This file supports working with JSON data.
Type::TypeID TypeID
#define P(N)
ModuleAnalysisManager MAM
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Value * RHS
LLVM Basic Block Representation.
Definition BasicBlock.h:62
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
IR2VecPrinterPass(raw_ostream &OS)
Definition IR2Vec.h:624
static bool isRequired()
Definition IR2Vec.h:626
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:668
This analysis provides the vocabulary for IR2Vec.
Definition IR2Vec.h:597
ir2vec::Vocabulary Result
Definition IR2Vec.h:614
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:629
LLVM_ABI IR2VecVocabAnalysis(ir2vec::VocabStorage &&Vocab)
Definition IR2Vec.h:612
static LLVM_ABI AnalysisKey Key
Definition IR2Vec.h:610
static bool isRequired()
Definition IR2Vec.h:636
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:710
IR2VecVocabPrinterPass(raw_ostream &OS)
Definition IR2Vec.h:634
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TypeID
Definitions of all of the base types for the Type system.
Definition Type.h:54
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI const Embedding & getBBVector(const BasicBlock &BB) const
Returns the embedding for a given basic block in the function F if it has been computed.
Definition IR2Vec.cpp:184
static LLVM_ABI std::unique_ptr< Embedder > create(IR2VecKind Mode, const Function &F, const Vocabulary &Vocab)
Factory method to create an Embedder object.
Definition IR2Vec.cpp:161
BBEmbeddingsMap BBVecMap
Definition IR2Vec.h:529
LLVM_ABI const BBEmbeddingsMap & getBBVecMap() const
Returns a map containing basic block and the corresponding embeddings for the function F if it has be...
Definition IR2Vec.cpp:178
const Vocabulary & Vocab
Definition IR2Vec.h:517
void computeEmbeddings() const
Function to compute embeddings.
Definition IR2Vec.cpp:199
virtual ~Embedder()=default
const float TypeWeight
Definition IR2Vec.h:524
LLVM_ABI const InstEmbeddingsMap & getInstVecMap() const
Returns a map containing instructions and the corresponding embeddings for the function F if it has b...
Definition IR2Vec.cpp:172
const float OpcWeight
Weights for different entities (like opcode, arguments, types) in the IR instructions to generate the...
Definition IR2Vec.h:524
const unsigned Dimension
Dimension of the vector representation; captured from the input vocabulary.
Definition IR2Vec.h:520
LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab)
Definition IR2Vec.cpp:156
const float ArgWeight
Definition IR2Vec.h:524
Embedding FuncVector
Definition IR2Vec.h:528
virtual void computeEmbeddings(const BasicBlock &BB) const =0
Function to compute the embedding for a given basic block.
LLVM_ABI const Embedding & getFunctionVector() const
Computes and returns the embedding for the current function.
Definition IR2Vec.cpp:192
InstEmbeddingsMap InstVecMap
Definition IR2Vec.h:530
const Function & F
Definition IR2Vec.h:516
FlowAwareEmbedder(const Function &F, const Vocabulary &Vocab)
Definition IR2Vec.h:588
SymbolicEmbedder(const Function &F, const Vocabulary &Vocab)
Definition IR2Vec.h:576
Iterator support for section-based access.
Definition IR2Vec.h:193
const_iterator(const VocabStorage *Storage, unsigned SectionId, size_t LocalIndex)
Definition IR2Vec.h:199
LLVM_ABI bool operator!=(const const_iterator &Other) const
Definition IR2Vec.cpp:328
LLVM_ABI const_iterator & operator++()
Definition IR2Vec.cpp:309
LLVM_ABI const Embedding & operator*() const
Definition IR2Vec.cpp:302
LLVM_ABI bool operator==(const const_iterator &Other) const
Definition IR2Vec.cpp:322
Generic storage class for section-based vocabularies.
Definition IR2Vec.h:151
const_iterator end() const
Definition IR2Vec.h:210
unsigned getNumSections() const
Get number of sections.
Definition IR2Vec.h:176
VocabStorage()
Default constructor creates empty storage (invalid state)
Definition IR2Vec.h:161
VocabStorage & operator=(VocabStorage &&)=delete
VocabStorage & operator=(const VocabStorage &)=delete
unsigned getDimension() const
Get vocabulary dimension.
Definition IR2Vec.h:187
size_t size() const
Get total number of entries across all sections.
Definition IR2Vec.h:173
const_iterator begin() const
Definition IR2Vec.h:209
bool isValid() const
Check if vocabulary is valid (has data)
Definition IR2Vec.h:190
VocabStorage(VocabStorage &&)=default
const std::vector< Embedding > & operator[](unsigned SectionId) const
Section-based access: Storage[sectionId][localIndex].
Definition IR2Vec.h:181
VocabStorage(const VocabStorage &)=delete
Class for storing and accessing the IR2Vec vocabulary.
Definition IR2Vec.h:232
static LLVM_ABI StringRef getVocabKeyForOperandKind(OperandKind Kind)
Function to get vocabulary key for a given OperandKind.
Definition IR2Vec.h:342
LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &Inv) const
Definition IR2Vec.cpp:406
const_iterator begin() const
Definition IR2Vec.h:401
LLVM_ABI unsigned getDimension() const
Definition IR2Vec.h:324
Vocabulary(Vocabulary &&)=default
static LLVM_ABI OperandKind getOperandKind(const Value *Op)
Function to classify an operand into OperandKind.
Definition IR2Vec.cpp:349
static LLVM_ABI unsigned getIndex(CmpInst::Predicate P)
Definition IR2Vec.h:371
Vocabulary & operator=(const Vocabulary &)=delete
static LLVM_ABI StringRef getStringKey(unsigned Pos)
Returns the string key for a given index position in the vocabulary.
Definition IR2Vec.cpp:388
static constexpr unsigned MaxCanonicalTypeIDs
Definition IR2Vec.h:302
LLVM_ABI const ir2vec::Embedding & operator[](CmpInst::Predicate P) const
Definition IR2Vec.h:393
static constexpr unsigned MaxOperandKinds
Definition IR2Vec.h:304
Vocabulary(const Vocabulary &)=delete
const_iterator cbegin() const
Definition IR2Vec.h:406
OperandKind
Operand kinds supported by IR2Vec Vocabulary.
Definition IR2Vec.h:288
static constexpr size_t getCanonicalSize()
Total number of entries (opcodes + canonicalized types + operand kinds + predicates)
Definition IR2Vec.h:331
static LLVM_ABI unsigned getIndex(const Value &Op)
Definition IR2Vec.h:365
static LLVM_ABI StringRef getVocabKeyForPredicate(CmpInst::Predicate P)
Function to get vocabulary key for a given predicate.
Definition IR2Vec.cpp:378
static constexpr unsigned MaxTypeIDs
Definition IR2Vec.h:301
LLVM_ABI Vocabulary(VocabStorage &&Storage)
Definition IR2Vec.h:312
LLVM_ABI const ir2vec::Embedding & operator[](Type::TypeID TypeID) const
Definition IR2Vec.h:381
static LLVM_ABI unsigned getIndex(Type::TypeID TypeID)
Definition IR2Vec.h:360
const_iterator end() const
Definition IR2Vec.h:408
static LLVM_ABI StringRef getVocabKeyForOpcode(unsigned Opcode)
Function to get vocabulary key for a given Opcode.
Definition IR2Vec.cpp:337
static LLVM_ABI StringRef getVocabKeyForTypeID(Type::TypeID TypeID)
Function to get vocabulary key for a given TypeID.
Definition IR2Vec.h:337
VocabStorage::const_iterator const_iterator
Const Iterator type aliases.
Definition IR2Vec.h:399
const_iterator cend() const
Definition IR2Vec.h:413
static LLVM_ABI unsigned getIndex(unsigned Opcode)
Functions to return flat index.
Definition IR2Vec.h:355
LLVM_ABI bool isValid() const
Definition IR2Vec.h:320
Vocabulary & operator=(Vocabulary &&Other)=delete
LLVM_ABI const ir2vec::Embedding & operator[](unsigned Opcode) const
Accessors to get the embedding for a given entity.
Definition IR2Vec.h:376
static LLVM_ABI VocabStorage createDummyVocabForTest(unsigned Dim=1)
Create a dummy vocabulary for testing purposes.
Definition IR2Vec.cpp:412
static constexpr unsigned MaxPredicateKinds
Definition IR2Vec.h:308
CanonicalTypeID
Canonical type IDs supported by IR2Vec Vocabulary.
Definition IR2Vec.h:271
LLVM_ABI const ir2vec::Embedding & operator[](const Value &Arg) const
Definition IR2Vec.h:387
A Value is an JSON value of unknown type.
Definition JSON.h:290
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
DenseMap< const Instruction *, Embedding > InstEmbeddingsMap
Definition IR2Vec.h:145
LLVM_ABI cl::opt< float > ArgWeight
DenseMap< const BasicBlock *, Embedding > BBEmbeddingsMap
Definition IR2Vec.h:146
LLVM_ABI cl::opt< float > OpcWeight
LLVM_ABI cl::opt< float > TypeWeight
LLVM_ABI cl::opt< IR2VecKind > IR2VecEmbeddingKind
llvm::cl::OptionCategory IR2VecCategory
This is an optimization pass for GlobalISel generic memory operations.
IR2VecKind
IR2Vec computes two kinds of embeddings: Symbolic and Flow-aware.
Definition IR2Vec.h:71
@ Other
Any other memory.
Definition ModRef.h:68
DWARFExpression::Operation Op
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1847
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition PassManager.h:93
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
Embedding is a datatype that wraps std::vector<double>.
Definition IR2Vec.h:87
const_iterator end() const
Definition IR2Vec.h:119
LLVM_ABI bool approximatelyEquals(const Embedding &RHS, double Tolerance=1e-4) const
Returns true if the embedding is approximately equal to the RHS embedding within the specified tolera...
Definition IR2Vec.cpp:132
const_iterator cbegin() const
Definition IR2Vec.h:120
LLVM_ABI Embedding & operator+=(const Embedding &RHS)
Arithmetic operators.
Definition IR2Vec.cpp:87
LLVM_ABI Embedding operator-(const Embedding &RHS) const
Definition IR2Vec.cpp:107
const std::vector< double > & getData() const
Definition IR2Vec.h:123
typename std::vector< double >::const_iterator const_iterator
Definition IR2Vec.h:114
Embedding(size_t Size, double InitialValue)
Definition IR2Vec.h:98
LLVM_ABI Embedding & operator-=(const Embedding &RHS)
Definition IR2Vec.cpp:100
const_iterator cend() const
Definition IR2Vec.h:121
LLVM_ABI Embedding operator*(double Factor) const
Definition IR2Vec.cpp:119
size_t size() const
Definition IR2Vec.h:100
LLVM_ABI Embedding & operator*=(double Factor)
Definition IR2Vec.cpp:113
Embedding(std::initializer_list< double > IL)
Definition IR2Vec.h:95
Embedding(const std::vector< double > &V)
Definition IR2Vec.h:93
LLVM_ABI Embedding operator+(const Embedding &RHS) const
Definition IR2Vec.cpp:94
bool empty() const
Definition IR2Vec.h:101
typename std::vector< double >::iterator iterator
Definition IR2Vec.h:113
LLVM_ABI Embedding & scaleAndAdd(const Embedding &Src, float Factor)
Adds Src Embedding scaled by Factor with the called Embedding.
Definition IR2Vec.cpp:125
Embedding(std::vector< double > &&V)
Definition IR2Vec.h:94
const double & operator[](size_t Itr) const
Definition IR2Vec.h:108
Embedding(size_t Size)
Definition IR2Vec.h:97
LLVM_ABI void print(raw_ostream &OS) const
Definition IR2Vec.cpp:145
const_iterator begin() const
Definition IR2Vec.h:118
double & operator[](size_t Itr)
Definition IR2Vec.h:103