1353940Sdim//===--------------------- DfaEmitter.h -----------------------------------===// 2353940Sdim// 3353940Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353940Sdim// See https://llvm.org/LICENSE.txt for license information. 5353940Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6353940Sdim// 7353940Sdim//===----------------------------------------------------------------------===// 8353940Sdim// Defines a generic automaton builder. This takes a set of transitions and 9353940Sdim// states that represent a nondeterministic finite state automaton (NFA) and 10353940Sdim// emits a determinized DFA in a form that include/llvm/Support/Automaton.h can 11353940Sdim// drive. 12353940Sdim// 13353940Sdim// See file llvm/TableGen/Automaton.td for the TableGen API definition. 14353940Sdim// 15353940Sdim//===----------------------------------------------------------------------===// 16353940Sdim 17353940Sdim#ifndef LLVM_UTILS_TABLEGEN_DFAEMITTER_H 18353940Sdim#define LLVM_UTILS_TABLEGEN_DFAEMITTER_H 19353940Sdim 20353940Sdim#include "llvm/ADT/StringRef.h" 21353940Sdim#include "llvm/ADT/UniqueVector.h" 22353940Sdim#include "llvm/Support/raw_ostream.h" 23353940Sdim#include "llvm/TableGen/Record.h" 24353940Sdim#include <set> 25353940Sdim#include <unordered_map> 26353940Sdim 27353940Sdimnamespace llvm { 28353940Sdim 29353940Sdimclass raw_ostream; 30353940Sdim/// Construct a deterministic finite state automaton from possible 31353940Sdim/// nondeterministic state and transition data. 32353940Sdim/// 33353940Sdim/// The state type is a 64-bit unsigned integer. The generated automaton is 34353940Sdim/// invariant to the sparsity of the state representation - its size is only 35353940Sdim/// a function of the cardinality of the set of states. 36353940Sdim/// 37353940Sdim/// The inputs to this emitter are considered to define a nondeterministic 38353940Sdim/// finite state automaton (NFA). This is then converted to a DFA during 39353940Sdim/// emission. The emitted tables can be used to by 40353940Sdim/// include/llvm/Support/Automaton.h. 41353940Sdimclass DfaEmitter { 42353940Sdimpublic: 43353940Sdim // The type of an NFA state. The initial state is always zero. 44353940Sdim using state_type = uint64_t; 45353940Sdim // The type of an action. 46353940Sdim using action_type = uint64_t; 47353940Sdim 48353940Sdim DfaEmitter() = default; 49353940Sdim virtual ~DfaEmitter() = default; 50353940Sdim 51353940Sdim void addTransition(state_type From, state_type To, action_type A); 52353940Sdim void emit(StringRef Name, raw_ostream &OS); 53353940Sdim 54353940Sdimprotected: 55353940Sdim /// Emit the C++ type of an action to OS. 56353940Sdim virtual void printActionType(raw_ostream &OS); 57353940Sdim /// Emit the C++ value of an action A to OS. 58353940Sdim virtual void printActionValue(action_type A, raw_ostream &OS); 59353940Sdim 60353940Sdimprivate: 61353940Sdim /// The state type of deterministic states. These are only used internally to 62353940Sdim /// this class. This is an ID into the DfaStates UniqueVector. 63353940Sdim using dfa_state_type = unsigned; 64353940Sdim 65353940Sdim /// The actual representation of a DFA state, which is a union of one or more 66353940Sdim /// NFA states. 67353940Sdim using DfaState = SmallVector<state_type, 4>; 68353940Sdim 69353940Sdim /// A DFA transition consists of a set of NFA states transitioning to a 70353940Sdim /// new set of NFA states. The DfaTransitionInfo tracks, for every 71353940Sdim /// transitioned-from NFA state, a set of valid transitioned-to states. 72353940Sdim /// 73353940Sdim /// Emission of this transition relation allows algorithmic determination of 74353940Sdim /// the possible candidate NFA paths taken under a given input sequence to 75353940Sdim /// reach a given DFA state. 76353940Sdim using DfaTransitionInfo = SmallVector<std::pair<state_type, state_type>, 4>; 77353940Sdim 78353940Sdim /// The set of all possible actions. 79353940Sdim std::set<action_type> Actions; 80353940Sdim 81353940Sdim /// The set of nondeterministic transitions. A state-action pair can 82353940Sdim /// transition to multiple target states. 83353940Sdim std::map<std::pair<state_type, action_type>, std::vector<state_type>> 84353940Sdim NfaTransitions; 85353940Sdim std::set<state_type> NfaStates; 86353940Sdim unsigned NumNfaTransitions = 0; 87353940Sdim 88353940Sdim /// The set of deterministic states. DfaStates.getId(DfaState) returns an ID, 89353940Sdim /// which is dfa_state_type. Note that because UniqueVector reserves state 90353940Sdim /// zero, the initial DFA state is always 1. 91353940Sdim UniqueVector<DfaState> DfaStates; 92353940Sdim /// The set of deterministic transitions. A state-action pair has only a 93353940Sdim /// single target state. 94353940Sdim std::map<std::pair<dfa_state_type, action_type>, 95353940Sdim std::pair<dfa_state_type, DfaTransitionInfo>> 96353940Sdim DfaTransitions; 97353940Sdim 98353940Sdim /// Visit all NFA states and construct the DFA. 99353940Sdim void constructDfa(); 100353940Sdim /// Visit a single DFA state and construct all possible transitions to new DFA 101353940Sdim /// states. 102358399Sdim void visitDfaState(const DfaState &DS); 103353940Sdim}; 104353940Sdim 105353940Sdim} // namespace llvm 106353940Sdim 107353940Sdim#endif 108