LLVM  14.0.0git
Automaton.h
Go to the documentation of this file.
1 //===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements class that drive and introspect deterministic finite-
10 // state automata (DFAs) as generated by TableGen's -gen-automata backend.
11 //
12 // For a description of how to define an automaton, see
13 // include/llvm/TableGen/Automaton.td.
14 //
15 // One important detail is that these deterministic automata are created from
16 // (potentially) nondeterministic definitions. Therefore a unique sequence of
17 // input symbols will produce one path through the DFA but multiple paths
18 // through the original NFA. An automaton by default only returns "accepted" or
19 // "not accepted", but frequently we want to analyze what NFA path was taken.
20 // Finding a path through the NFA states that results in a DFA state can help
21 // answer *what* the solution to a problem was, not just that there exists a
22 // solution.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #ifndef LLVM_SUPPORT_AUTOMATON_H
27 #define LLVM_SUPPORT_AUTOMATON_H
28 
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/Support/Allocator.h"
33 #include <deque>
34 #include <map>
35 #include <memory>
36 #include <unordered_map>
37 #include <vector>
38 
39 namespace llvm {
40 
42 
43 /// Forward define the pair type used by the automata transition info tables.
44 ///
45 /// Experimental results with large tables have shown a significant (multiple
46 /// orders of magnitude) parsing speedup by using a custom struct here with a
47 /// trivial constructor rather than std::pair<uint64_t, uint64_t>.
48 struct NfaStatePair {
50 
51  bool operator<(const NfaStatePair &Other) const {
52  return std::make_tuple(FromDfaState, ToDfaState) <
53  std::make_tuple(Other.FromDfaState, Other.ToDfaState);
54  }
55 };
56 
57 namespace internal {
58 /// The internal class that maintains all possible paths through an NFA based
59 /// on a path through the DFA.
61 private:
62  /// Cached transition table. This is a table of NfaStatePairs that contains
63  /// zero-terminated sequences pointed to by DFA transitions.
64  ArrayRef<NfaStatePair> TransitionInfo;
65 
66  /// A simple linked-list of traversed states that can have a shared tail. The
67  /// traversed path is stored in reverse order with the latest state as the
68  /// head.
69  struct PathSegment {
70  uint64_t State;
71  PathSegment *Tail;
72  };
73 
74  /// We allocate segment objects frequently. Allocate them upfront and dispose
75  /// at the end of a traversal rather than hammering the system allocator.
77 
78  /// Heads of each tracked path. These are not ordered.
79  std::deque<PathSegment *> Heads;
80 
81  /// The returned paths. This is populated during getPaths.
83 
84  /// Create a new segment and return it.
85  PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) {
86  PathSegment *P = Allocator.Allocate();
87  *P = {State, Tail};
88  return P;
89  }
90 
91  /// Pairs defines a sequence of possible NFA transitions for a single DFA
92  /// transition.
93  void transition(ArrayRef<NfaStatePair> Pairs) {
94  // Iterate over all existing heads. We will mutate the Heads deque during
95  // iteration.
96  unsigned NumHeads = Heads.size();
97  for (unsigned I = 0; I < NumHeads; ++I) {
98  PathSegment *Head = Heads[I];
99  // The sequence of pairs is sorted. Select the set of pairs that
100  // transition from the current head state.
101  auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL});
102  auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX});
103  // For every transition from the current head state, add a new path
104  // segment.
105  for (; PI != PE; ++PI)
106  if (PI->FromDfaState == Head->State)
107  Heads.push_back(makePathSegment(PI->ToDfaState, Head));
108  }
109  // Now we've iterated over all the initial heads and added new ones,
110  // dispose of the original heads.
111  Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads));
112  }
113 
114 public:
116  : TransitionInfo(TransitionInfo) {
117  reset();
118  }
119 
121  return TransitionInfo;
122  }
123 
124  void reset() {
125  Paths.clear();
126  Heads.clear();
127  Allocator.DestroyAll();
128  // The initial NFA state is 0.
129  Heads.push_back(makePathSegment(0ULL, nullptr));
130  }
131 
132  void transition(unsigned TransitionInfoIdx) {
133  unsigned EndIdx = TransitionInfoIdx;
134  while (TransitionInfo[EndIdx].ToDfaState != 0)
135  ++EndIdx;
136  ArrayRef<NfaStatePair> Pairs(&TransitionInfo[TransitionInfoIdx],
137  EndIdx - TransitionInfoIdx);
138  transition(Pairs);
139  }
140 
142  Paths.clear();
143  for (auto *Head : Heads) {
144  NfaPath P;
145  while (Head->State != 0) {
146  P.push_back(Head->State);
147  Head = Head->Tail;
148  }
149  std::reverse(P.begin(), P.end());
150  Paths.push_back(std::move(P));
151  }
152  return Paths;
153  }
154 };
155 } // namespace internal
156 
157 /// A deterministic finite-state automaton. The automaton is defined in
158 /// TableGen; this object drives an automaton defined by tblgen-emitted tables.
159 ///
160 /// An automaton accepts a sequence of input tokens ("actions"). This class is
161 /// templated on the type of these actions.
162 template <typename ActionT> class Automaton {
163  /// Map from {State, Action} to {NewState, TransitionInfoIdx}.
164  /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition.
165  /// FIXME: This uses a std::map because ActionT can be a pair type including
166  /// an enum. In particular DenseMapInfo<ActionT> must be defined to use
167  /// DenseMap here.
168  /// This is a shared_ptr to allow very quick copy-construction of Automata; this
169  /// state is immutable after construction so this is safe.
170  using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>;
171  std::shared_ptr<MapTy> M;
172  /// An optional transcription object. This uses much more state than simply
173  /// traversing the DFA for acceptance, so is heap allocated.
174  std::shared_ptr<internal::NfaTranscriber> Transcriber;
175  /// The initial DFA state is 1.
176  uint64_t State = 1;
177  /// True if we should transcribe and false if not (even if Transcriber is defined).
178  bool Transcribe;
179 
180 public:
181  /// Create an automaton.
182  /// \param Transitions The Transitions table as created by TableGen. Note that
183  /// because the action type differs per automaton, the
184  /// table type is templated as ArrayRef<InfoT>.
185  /// \param TranscriptionTable The TransitionInfo table as created by TableGen.
186  ///
187  /// Providing the TranscriptionTable argument as non-empty will enable the
188  /// use of transcription, which analyzes the possible paths in the original
189  /// NFA taken by the DFA. NOTE: This is substantially more work than simply
190  /// driving the DFA, so unless you require the getPaths() method leave this
191  /// empty.
192  template <typename InfoT>
194  ArrayRef<NfaStatePair> TranscriptionTable = {}) {
195  if (!TranscriptionTable.empty())
196  Transcriber =
197  std::make_shared<internal::NfaTranscriber>(TranscriptionTable);
198  Transcribe = Transcriber != nullptr;
199  M = std::make_shared<MapTy>();
200  for (const auto &I : Transitions)
201  // Greedily read and cache the transition table.
202  M->emplace(std::make_pair(I.FromDfaState, I.Action),
203  std::make_pair(I.ToDfaState, I.InfoIdx));
204  }
205  Automaton(const Automaton &Other)
206  : M(Other.M), State(Other.State), Transcribe(Other.Transcribe) {
207  // Transcriber is not thread-safe, so create a new instance on copy.
208  if (Other.Transcriber)
209  Transcriber = std::make_shared<internal::NfaTranscriber>(
210  Other.Transcriber->getTransitionInfo());
211  }
212 
213  /// Reset the automaton to its initial state.
214  void reset() {
215  State = 1;
216  if (Transcriber)
217  Transcriber->reset();
218  }
219 
220  /// Enable or disable transcription. Transcription is only available if
221  /// TranscriptionTable was provided to the constructor.
222  void enableTranscription(bool Enable = true) {
223  assert(Transcriber &&
224  "Transcription is only available if TranscriptionTable was provided "
225  "to the Automaton constructor");
226  Transcribe = Enable;
227  }
228 
229  /// Transition the automaton based on input symbol A. Return true if the
230  /// automaton transitioned to a valid state, false if the automaton
231  /// transitioned to an invalid state.
232  ///
233  /// If this function returns false, all methods are undefined until reset() is
234  /// called.
235  bool add(const ActionT &A) {
236  auto I = M->find({State, A});
237  if (I == M->end())
238  return false;
239  if (Transcriber && Transcribe)
240  Transcriber->transition(I->second.second);
241  State = I->second.first;
242  return true;
243  }
244 
245  /// Return true if the automaton can be transitioned based on input symbol A.
246  bool canAdd(const ActionT &A) {
247  auto I = M->find({State, A});
248  return I != M->end();
249  }
250 
251  /// Obtain a set of possible paths through the input nondeterministic
252  /// automaton that could be obtained from the sequence of input actions
253  /// presented to this deterministic automaton.
255  assert(Transcriber && Transcribe &&
256  "Can only obtain NFA paths if transcribing!");
257  return Transcriber->getPaths();
258  }
259 };
260 
261 } // namespace llvm
262 
263 #endif // LLVM_SUPPORT_AUTOMATON_H
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::upper_bound
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1674
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::internal::NfaTranscriber
The internal class that maintains all possible paths through an NFA based on a path through the DFA.
Definition: Automaton.h:60
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1661
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::SmallVector< uint64_t, 4 >
Allocator.h
llvm::SpecificBumpPtrAllocator< PathSegment >
llvm::internal::NfaTranscriber::reset
void reset()
Definition: Automaton.h:124
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::Automaton::Automaton
Automaton(const Automaton &Other)
Definition: Automaton.h:205
llvm::Automaton::getNfaPaths
ArrayRef< NfaPath > getNfaPaths()
Obtain a set of possible paths through the input nondeterministic automaton that could be obtained fr...
Definition: Automaton.h:254
llvm::internal::NfaTranscriber::getTransitionInfo
ArrayRef< NfaStatePair > getTransitionInfo() const
Definition: Automaton.h:120
llvm::NfaStatePair::FromDfaState
uint64_t FromDfaState
Definition: Automaton.h:49
llvm::internal::NfaTranscriber::transition
void transition(unsigned TransitionInfoIdx)
Definition: Automaton.h:132
llvm::Automaton::reset
void reset()
Reset the automaton to its initial state.
Definition: Automaton.h:214
INT64_MAX
#define INT64_MAX
Definition: DataTypes.h:71
llvm::internal::NfaTranscriber::NfaTranscriber
NfaTranscriber(ArrayRef< NfaStatePair > TransitionInfo)
Definition: Automaton.h:115
uint64_t
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:59
ArrayRef.h
Enable
@ Enable
Definition: DwarfDebug.cpp:88
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Automaton::enableTranscription
void enableTranscription(bool Enable=true)
Enable or disable transcription.
Definition: Automaton.h:222
llvm::NfaStatePair
Forward define the pair type used by the automata transition info tables.
Definition: Automaton.h:48
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::CallingConv::Tail
@ Tail
Tail - This calling convention attemps to make calls as fast as possible while guaranteeing that tail...
Definition: CallingConv.h:81
llvm::Automaton::canAdd
bool canAdd(const ActionT &A)
Return true if the automaton can be transitioned based on input symbol A.
Definition: Automaton.h:246
llvm::internal::NfaTranscriber::getPaths
ArrayRef< NfaPath > getPaths()
Definition: Automaton.h:141
llvm::NfaStatePair::ToDfaState
uint64_t ToDfaState
Definition: Automaton.h:49
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
SmallVector.h
Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:146
llvm::Automaton
A deterministic finite-state automaton.
Definition: Automaton.h:162
llvm::Automaton::Automaton
Automaton(ArrayRef< InfoT > Transitions, ArrayRef< NfaStatePair > TranscriptionTable={})
Create an automaton.
Definition: Automaton.h:193
llvm::NfaStatePair::operator<
bool operator<(const NfaStatePair &Other) const
Definition: Automaton.h:51
llvm::Automaton::add
bool add(const ActionT &A)
Transition the automaton based on input symbol A.
Definition: Automaton.h:235
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172