1353940Sdim//===--------------------- DfaEmitter.h -----------------------------------===//
2353940Sdim//
3353940Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353940Sdim// See https://llvm.org/LICENSE.txt for license information.
5353940Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6353940Sdim//
7353940Sdim//===----------------------------------------------------------------------===//
8353940Sdim// Defines a generic automaton builder. This takes a set of transitions and
9353940Sdim// states that represent a nondeterministic finite state automaton (NFA) and
10353940Sdim// emits a determinized DFA in a form that include/llvm/Support/Automaton.h can
11353940Sdim// drive.
12353940Sdim//
13353940Sdim// See file llvm/TableGen/Automaton.td for the TableGen API definition.
14353940Sdim//
15353940Sdim//===----------------------------------------------------------------------===//
16353940Sdim
17353940Sdim#ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H
18353940Sdim#define LLVM_UTILS_TABLEGEN_DFAEMITTER_H
19353940Sdim
20353940Sdim#include "llvm/ADT/StringRef.h"
21353940Sdim#include "llvm/ADT/UniqueVector.h"
22353940Sdim#include "llvm/Support/raw_ostream.h"
23353940Sdim#include "llvm/TableGen/Record.h"
24353940Sdim#include <set>
25353940Sdim#include <unordered_map>
26353940Sdim
27353940Sdimnamespace llvm {
28353940Sdim
29353940Sdimclass raw_ostream;
30353940Sdim/// Construct a deterministic finite state automaton from possible
31353940Sdim/// nondeterministic state and transition data.
32353940Sdim///
33353940Sdim/// The state type is a 64-bit unsigned integer. The generated automaton is
34353940Sdim/// invariant to the sparsity of the state representation - its size is only
35353940Sdim/// a function of the cardinality of the set of states.
36353940Sdim///
37353940Sdim/// The inputs to this emitter are considered to define a nondeterministic
38353940Sdim/// finite state automaton (NFA). This is then converted to a DFA during
39353940Sdim/// emission. The emitted tables can be used to by
40353940Sdim/// include/llvm/Support/Automaton.h.
41353940Sdimclass DfaEmitter {
42353940Sdimpublic:
43353940Sdim  // The type of an NFA state. The initial state is always zero.
44353940Sdim  using state_type = uint64_t;
45353940Sdim  // The type of an action.
46353940Sdim  using action_type = uint64_t;
47353940Sdim
48353940Sdim  DfaEmitter() = default;
49353940Sdim  virtual ~DfaEmitter() = default;
50353940Sdim
51353940Sdim  void addTransition(state_type From, state_type To, action_type A);
52353940Sdim  void emit(StringRef Name, raw_ostream &OS);
53353940Sdim
54353940Sdimprotected:
55353940Sdim  /// Emit the C++ type of an action to OS.
56353940Sdim  virtual void printActionType(raw_ostream &OS);
57353940Sdim  /// Emit the C++ value of an action A to OS.
58353940Sdim  virtual void printActionValue(action_type A, raw_ostream &OS);
59353940Sdim
60353940Sdimprivate:
61353940Sdim  /// The state type of deterministic states. These are only used internally to
62353940Sdim  /// this class. This is an ID into the DfaStates UniqueVector.
63353940Sdim  using dfa_state_type = unsigned;
64353940Sdim
65353940Sdim  /// The actual representation of a DFA state, which is a union of one or more
66353940Sdim  /// NFA states.
67353940Sdim  using DfaState = SmallVector<state_type, 4>;
68353940Sdim
69353940Sdim  /// A DFA transition consists of a set of NFA states transitioning to a
70353940Sdim  /// new set of NFA states. The DfaTransitionInfo tracks, for every
71353940Sdim  /// transitioned-from NFA state, a set of valid transitioned-to states.
72353940Sdim  ///
73353940Sdim  /// Emission of this transition relation allows algorithmic determination of
74353940Sdim  /// the possible candidate NFA paths taken under a given input sequence to
75353940Sdim  /// reach a given DFA state.
76353940Sdim  using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>;
77353940Sdim
78353940Sdim  /// The set of all possible actions.
79353940Sdim  std::set<action_type> Actions;
80353940Sdim
81353940Sdim  /// The set of nondeterministic transitions. A state-action pair can
82353940Sdim  /// transition to multiple target states.
83353940Sdim  std::map<std::pair<state_type, action_type>, std::vector<state_type>>
84353940Sdim      NfaTransitions;
85353940Sdim  std::set<state_type> NfaStates;
86353940Sdim  unsigned NumNfaTransitions = 0;
87353940Sdim
88353940Sdim  /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID,
89353940Sdim  /// which is dfa_state_type. Note that because UniqueVector reserves state
90353940Sdim  /// zero, the initial DFA state is always 1.
91353940Sdim  UniqueVector<DfaState> DfaStates;
92353940Sdim  /// The set of deterministic transitions. A state-action pair has only a
93353940Sdim  /// single target state.
94353940Sdim  std::map<std::pair<dfa_state_type, action_type>,
95353940Sdim           std::pair<dfa_state_type, DfaTransitionInfo>>
96353940Sdim      DfaTransitions;
97353940Sdim
98353940Sdim  /// Visit all NFA states and construct the DFA.
99353940Sdim  void constructDfa();
100353940Sdim  /// Visit a single DFA state and construct all possible transitions to new DFA
101353940Sdim  /// states.
102358399Sdim  void visitDfaState(const DfaState &DS);
103353940Sdim};
104353940Sdim
105353940Sdim} // namespace llvm
106353940Sdim
107353940Sdim#endif
108