1(* Modified by sweeks@acm.org on 2000-8-24. 2 * Ported to MLton. 3 *) 4type int = Int.int 5 6(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi 7 * 8 * $Log$ 9 * Revision 1.1 2006/06/23 03:21:27 michaeln 10 * Changed the names of the files in mlyacclib because I want these files 11 * to move into sigobj, and I don't want name-clashes, particularly with 12 * names like stream.sml. (If you use a parser generated by mlyacc, then 13 * you need to have the files in mlyacclib available too.) 14 * 15 * Revision 1.1 2006/06/22 07:40:27 michaeln 16 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources 17 * as the base. 18 * 19 * Revision 1.1.1.1 1997/01/14 01:38:04 george 20 * Version 109.24 21 * 22 * Revision 1.1.1.1 1996/01/31 16:01:42 george 23 * Version 109 24 * 25 *) 26 27(* base.sig: Base signature file for SML-Yacc. This file contains signatures 28 that must be loaded before any of the files produced by ML-Yacc are loaded 29*) 30 31(* STREAM: signature for a lazy stream.*) 32 33signature STREAM = 34 sig type 'xa stream 35 val streamify : (unit -> '_a) -> '_a stream 36 val cons : '_a * '_a stream -> '_a stream 37 val get : '_a stream -> '_a * '_a stream 38 end 39 40(* LR_TABLE: signature for an LR Table. 41 42 The list of actions and gotos passed to mkLrTable must be ordered by state 43 number. The values for state 0 are the first in the list, the values for 44 state 1 are next, etc. 45*) 46 47signature LR_TABLE = 48 sig 49 datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist 50 datatype state = STATE of int 51 datatype term = T of int 52 datatype nonterm = NT of int 53 datatype action = SHIFT of state 54 | REDUCE of int 55 | ACCEPT 56 | ERROR 57 type table 58 59 val numStates : table -> int 60 val numRules : table -> int 61 val describeActions : table -> state -> 62 (term,action) pairlist * action 63 val describeGoto : table -> state -> (nonterm,state) pairlist 64 val action : table -> state * term -> action 65 val goto : table -> state * nonterm -> state 66 val initialState : table -> state 67 exception Goto of state * nonterm 68 69 val mkLrTable : {actions : ((term,action) pairlist * action) array, 70 gotos : (nonterm,state) pairlist array, 71 numStates : int, numRules : int, 72 initialState : state} -> table 73 end 74 75(* TOKEN: signature revealing the internal structure of a token. This signature 76 TOKEN distinct from the signature {parser name}_TOKENS produced by ML-Yacc. 77 The {parser name}_TOKENS structures contain some types and functions to 78 construct tokens from values and positions. 79 80 The representation of token was very carefully chosen here to allow the 81 polymorphic parser to work without knowing the types of semantic values 82 or line numbers. 83 84 This has had an impact on the TOKENS structure produced by SML-Yacc, which 85 is a structure parameter to lexer functors. We would like to have some 86 type 'a token which functions to construct tokens would create. A 87 constructor function for a integer token might be 88 89 INT: int * 'a * 'a -> 'a token. 90 91 This is not possible because we need to have tokens with the representation 92 given below for the polymorphic parser. 93 94 Thus our constructur functions for tokens have the form: 95 96 INT: int * 'a * 'a -> (svalue,'a) token 97 98 This in turn has had an impact on the signature that lexers for SML-Yacc 99 must match and the types that a user must declare in the user declarations 100 section of lexers. 101*) 102 103signature TOKEN = 104 sig 105 structure LrTable : LR_TABLE 106 datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b) 107 val sameToken : ('a,'b) token * ('a,'b) token -> bool 108 end 109 110(* LR_PARSER: signature for a polymorphic LR parser *) 111 112signature LR_PARSER = 113 sig 114 structure Stream: STREAM 115 structure LrTable : LR_TABLE 116 structure Token : TOKEN 117 118 sharing LrTable = Token.LrTable 119 120 exception ParseError 121 122 val parse : {table : LrTable.table, 123 lexer : ('_b,'_c) Token.token Stream.stream, 124 arg: 'arg, 125 saction : int * 126 '_c * 127 (LrTable.state * ('_b * '_c * '_c)) list * 128 'arg -> 129 LrTable.nonterm * 130 ('_b * '_c * '_c) * 131 ((LrTable.state *('_b * '_c * '_c)) list), 132 void : '_b, 133 ec : { is_keyword : LrTable.term -> bool, 134 noShift : LrTable.term -> bool, 135 preferred_change : (LrTable.term list * LrTable.term list) list, 136 errtermvalue : LrTable.term -> '_b, 137 showTerminal : LrTable.term -> string, 138 terms: LrTable.term list, 139 error : string * '_c * '_c -> unit 140 }, 141 lookahead : int (* max amount of lookahead used in *) 142 (* error correction *) 143 } -> '_b * 144 (('_b,'_c) Token.token Stream.stream) 145 end 146 147(* LEXER: a signature that most lexers produced for use with SML-Yacc's 148 output will match. The user is responsible for declaring type token, 149 type pos, and type svalue in the UserDeclarations section of a lexer. 150 151 Note that type token is abstract in the lexer. This allows SML-Yacc to 152 create a TOKENS signature for use with lexers produced by ML-Lex that 153 treats the type token abstractly. Lexers that are functors parametrized by 154 a Tokens structure matching a TOKENS signature cannot examine the structure 155 of tokens. 156*) 157 158signature LEXER = 159 sig 160 structure UserDeclarations : 161 sig 162 type ('a,'b) token 163 type pos 164 type svalue 165 end 166 val makeLexer : (int -> string) -> unit -> 167 (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token 168 end 169 170(* ARG_LEXER: the %arg option of ML-Lex allows users to produce lexers which 171 also take an argument before yielding a function from unit to a token 172*) 173 174signature ARG_LEXER = 175 sig 176 structure UserDeclarations : 177 sig 178 type ('a,'b) token 179 type pos 180 type svalue 181 type arg 182 end 183 val makeLexer : (int -> string) -> UserDeclarations.arg -> unit -> 184 (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token 185 end 186 187(* PARSER_DATA: the signature of ParserData structures in {parser name}LrValsFun 188 produced by SML-Yacc. All such structures match this signature. 189 190 The {parser name}LrValsFun produces a structure which contains all the values 191 except for the lexer needed to call the polymorphic parser mentioned 192 before. 193 194*) 195 196signature PARSER_DATA = 197 sig 198 (* the type of line numbers *) 199 200 type pos 201 202 (* the type of semantic values *) 203 204 type svalue 205 206 (* the type of the user-supplied argument to the parser *) 207 type arg 208 209 (* the intended type of the result of the parser. This value is 210 produced by applying extract from the structure Actions to the 211 final semantic value resultiing from a parse. 212 *) 213 214 type result 215 216 structure LrTable : LR_TABLE 217 structure Token : TOKEN 218 sharing Token.LrTable = LrTable 219 220 (* structure Actions contains the functions which mantain the 221 semantic values stack in the parser. Void is used to provide 222 a default value for the semantic stack. 223 *) 224 225 structure Actions : 226 sig 227 val actions : int * pos * 228 (LrTable.state * (svalue * pos * pos)) list * arg-> 229 LrTable.nonterm * (svalue * pos * pos) * 230 ((LrTable.state *(svalue * pos * pos)) list) 231 val void : svalue 232 val extract : svalue -> result 233 end 234 235 (* structure EC contains information used to improve error 236 recovery in an error-correcting parser *) 237 238 structure EC : 239 sig 240 val is_keyword : LrTable.term -> bool 241 val noShift : LrTable.term -> bool 242 val preferred_change : (LrTable.term list * LrTable.term list) list 243 val errtermvalue : LrTable.term -> svalue 244 val showTerminal : LrTable.term -> string 245 val terms: LrTable.term list 246 end 247 248 (* table is the LR table for the parser *) 249 250 val table : LrTable.table 251 end 252 253(* signature PARSER is the signature that most user parsers created by 254 SML-Yacc will match. 255*) 256 257signature PARSER = 258 sig 259 structure Token : TOKEN 260 structure Stream : STREAM 261 exception ParseError 262 263 (* type pos is the type of line numbers *) 264 265 type pos 266 267 (* type result is the type of the result from the parser *) 268 269 type result 270 271 (* the type of the user-supplied argument to the parser *) 272 type arg 273 274 (* type svalue is the type of semantic values for the semantic value 275 stack 276 *) 277 278 type svalue 279 280 (* val makeLexer is used to create a stream of tokens for the parser *) 281 282 val makeLexer : (int -> string) -> 283 (svalue,pos) Token.token Stream.stream 284 285 (* val parse takes a stream of tokens and a function to print 286 errors and returns a value of type result and a stream containing 287 the unused tokens 288 *) 289 290 val parse : int * ((svalue,pos) Token.token Stream.stream) * 291 (string * pos * pos -> unit) * arg -> 292 result * (svalue,pos) Token.token Stream.stream 293 294 val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token -> 295 bool 296 end 297 298(* signature ARG_PARSER is the signature that will be matched by parsers whose 299 lexer takes an additional argument. 300*) 301 302signature ARG_PARSER = 303 sig 304 structure Token : TOKEN 305 structure Stream : STREAM 306 exception ParseError 307 308 type arg 309 type lexarg 310 type pos 311 type result 312 type svalue 313 314 val makeLexer : (int -> string) -> lexarg -> 315 (svalue,pos) Token.token Stream.stream 316 val parse : int * ((svalue,pos) Token.token Stream.stream) * 317 (string * pos * pos -> unit) * arg -> 318 result * (svalue,pos) Token.token Stream.stream 319 320 val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token -> 321 bool 322 end 323 324