1(* Modified by sweeks@acm.org on 2000-8-24. 2 * Ported to MLton. 3 *) 4type int = Int.int 5 6(* ML-Yacc Parser Generator (c) 1989, 1991 Andrew W. Appel, David R. Tarditi 7 * 8 * $Log$ 9 * Revision 1.1 2006/06/22 07:40:27 michaeln 10 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources 11 * as the base. 12 * 13 * Revision 1.1.1.1 1998/04/08 18:40:17 george 14 * Version 110.5 15 * 16 * Revision 1.2 1997/05/20 16:23:21 dbm 17 * SML '97 sharing. 18 * 19# Revision 1.1.1.1 1997/01/14 01:38:06 george 20# Version 109.24 21# 22 * Revision 1.2 1996/02/26 15:02:38 george 23 * print no longer overloaded. 24 * use of makestring has been removed and replaced with Int.toString .. 25 * use of IO replaced with TextIO 26 * 27 * Revision 1.1.1.1 1996/01/31 16:01:46 george 28 * Version 109 29 * 30 *) 31 32signature HEADER = 33 sig 34 type pos = int 35 val lineno : pos ref 36 val text : string list ref 37 38 type inputSource 39 val newSource : string * TextIO.instream * TextIO.outstream -> inputSource 40 val error : inputSource -> pos -> string -> unit 41 val warn : inputSource -> pos -> string -> unit 42 val errorOccurred : inputSource -> unit -> bool 43 44 datatype symbol = SYMBOL of string * pos 45 val symbolName : symbol -> string 46 val symbolPos : symbol -> pos 47 val symbolMake : string * int -> symbol 48 49 type ty 50 val tyName : ty -> string 51 val tyMake : string -> ty 52 53 (* associativities: each kind of associativity is assigned a unique 54 integer *) 55 56 datatype prec = LEFT | RIGHT | NONASSOC 57 datatype control = NODEFAULT | VERBOSE | PARSER_NAME of symbol | 58 FUNCTOR of string | START_SYM of symbol | 59 NSHIFT of symbol list | POS of string | PURE | 60 PARSE_ARG of string * string | 61 TOKEN_SIG_INFO of string 62 63 datatype rule = RULE of {lhs : symbol, rhs : symbol list, 64 code : string, prec : symbol option} 65 66 datatype declData = DECL of 67 {eop : symbol list, 68 keyword : symbol list, 69 nonterm : (symbol * ty option) list option, 70 prec : (prec * (symbol list)) list, 71 change: (symbol list * symbol list) list, 72 term : (symbol * ty option) list option, 73 control : control list, 74 value : (symbol * string) list} 75 76 val join_decls : declData * declData * inputSource * pos -> declData 77 78 type parseResult 79 val getResult : parseResult -> string * declData * rule list 80 end; 81 82signature PARSE_GEN_PARSER = 83 sig 84 structure Header : HEADER 85 val parse : string -> Header.parseResult * Header.inputSource 86 end; 87 88signature PARSE_GEN = 89 sig 90 val parseGen : string -> unit 91 end; 92 93signature GRAMMAR = 94 sig 95 96 datatype term = T of int 97 datatype nonterm = NT of int 98 datatype symbol = TERM of term | NONTERM of nonterm 99 100 (* grammar: 101 terminals should be numbered from 0 to terms-1, 102 nonterminals should be numbered from 0 to nonterms-1, 103 rules should be numbered between 0 and (length rules) - 1, 104 higher precedence binds tighter, 105 start nonterminal should not occur on the rhs of any rule 106 *) 107 108 datatype grammar = GRAMMAR of 109 {rules: {lhs : nonterm, rhs : symbol list, 110 precedence : int option, rulenum : int } list, 111 terms: int, 112 nonterms: int, 113 start : nonterm, 114 eop : term list, 115 noshift : term list, 116 precedence : term -> int option, 117 termToString : term -> string, 118 nontermToString : nonterm -> string} 119 end 120 121(* signature for internal version of grammar *) 122 123signature INTGRAMMAR = 124 sig 125 structure Grammar : GRAMMAR 126 structure SymbolAssoc : TABLE 127 structure NontermAssoc : TABLE 128 129 sharing type SymbolAssoc.key = Grammar.symbol 130 sharing type NontermAssoc.key = Grammar.nonterm 131 132 datatype rule = RULE of 133 {lhs : Grammar.nonterm, 134 rhs : Grammar.symbol list, 135 136 (* internal number of rule - convenient for producing LR graph *) 137 138 num : int, 139 rulenum : int, 140 precedence : int option} 141 142 val gtTerm : Grammar.term * Grammar.term -> bool 143 val eqTerm : Grammar.term * Grammar.term -> bool 144 145 val gtNonterm : Grammar.nonterm * Grammar.nonterm -> bool 146 val eqNonterm : Grammar.nonterm * Grammar.nonterm -> bool 147 148 val gtSymbol : Grammar.symbol * Grammar.symbol -> bool 149 val eqSymbol : Grammar.symbol * Grammar.symbol -> bool 150 151 (* Debugging information will be generated only if DEBUG is true. *) 152 153 val DEBUG : bool 154 155 val prRule : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * 156 (string -> 'b) -> rule -> unit 157 val prGrammar : (Grammar.symbol -> string)*(Grammar.nonterm -> string) * 158 (string -> unit) -> Grammar.grammar -> unit 159 end 160 161signature CORE = 162 sig 163 structure Grammar : GRAMMAR 164 structure IntGrammar : INTGRAMMAR 165 sharing Grammar = IntGrammar.Grammar 166 167 datatype item = ITEM of 168 { rule : IntGrammar.rule, 169 dot : int, 170 171(* rhsAfter: The portion of the rhs of a rule that lies after the dot *) 172 173 rhsAfter: Grammar.symbol list } 174 175(* eqItem and gtItem compare items *) 176 177 val eqItem : item * item -> bool 178 val gtItem : item * item -> bool 179 180(* functions for maintaining ordered item lists *) 181 182 val insert : item * item list -> item list 183 val union : item list * item list -> item list 184 185(* core: a set of items. It is represented by an ordered list of items. 186 The list is in ascending order The rule numbers and the positions of the 187 dots are used to order the items. *) 188 189 datatype core = CORE of item list * int (* state # *) 190 191(* gtCore and eqCore compare the lists of items *) 192 193 val gtCore : core * core -> bool 194 val eqCore : core * core -> bool 195 196(* functions for debugging *) 197 198 val prItem : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * 199 (string -> unit) -> item -> unit 200 val prCore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * 201 (string -> unit) -> core -> unit 202end 203 204signature CORE_UTILS = 205 sig 206 207 structure Grammar : GRAMMAR 208 structure IntGrammar : INTGRAMMAR 209 structure Core : CORE 210 211 sharing Grammar = IntGrammar.Grammar = Core.Grammar 212 sharing IntGrammar = Core.IntGrammar 213 214(* mkFuncs: create functions for the set of productions derived from a 215 nonterminal, the cores that result from shift/gotos from a core, 216 and return a list of rules *) 217 218 val mkFuncs : Grammar.grammar -> 219 { produces : Grammar.nonterm -> IntGrammar.rule list, 220 221(* shifts: take a core and compute all the cores that result from shifts/gotos 222 on symbols *) 223 224 shifts : Core.core -> (Grammar.symbol*Core.item list) list, 225 rules: IntGrammar.rule list, 226 227(* epsProds: take a core compute epsilon productions for it *) 228 229 epsProds : Core.core -> IntGrammar.rule list} 230 end 231 232signature LRGRAPH = 233 sig 234 structure Grammar : GRAMMAR 235 structure IntGrammar : INTGRAMMAR 236 structure Core : CORE 237 238 sharing Grammar = IntGrammar.Grammar = Core.Grammar 239 sharing IntGrammar = Core.IntGrammar 240 241 type graph 242 val edges : Core.core * graph -> {edge:Grammar.symbol,to:Core.core} list 243 val nodes : graph -> Core.core list 244 val shift : graph -> int * Grammar.symbol -> int (* int = state # *) 245 val core : graph -> int -> Core.core (* get core for a state *) 246 247(* mkGraph: compute the LR(0) sets of items *) 248 249 val mkGraph : Grammar.grammar -> 250 {graph : graph, 251 produces : Grammar.nonterm -> IntGrammar.rule list, 252 rules : IntGrammar.rule list, 253 epsProds: Core.core -> IntGrammar.rule list} 254 255 val prGraph: (Grammar.symbol -> string)*(Grammar.nonterm -> string) * 256 (string -> unit) -> graph -> unit 257 end 258 259signature LOOK = 260 sig 261 structure Grammar : GRAMMAR 262 structure IntGrammar : INTGRAMMAR 263 sharing Grammar = IntGrammar.Grammar 264 265 val union : Grammar.term list * Grammar.term list -> Grammar.term list 266 val make_set : Grammar.term list -> Grammar.term list 267 268 val mkFuncs : {rules : IntGrammar.rule list, nonterms : int, 269 produces : Grammar.nonterm -> IntGrammar.rule list} -> 270 {nullable: Grammar.nonterm -> bool, 271 first : Grammar.symbol list -> Grammar.term list} 272 273 val prLook : (Grammar.term -> string) * (string -> unit) -> 274 Grammar.term list -> unit 275 end 276 277signature LALR_GRAPH = 278 sig 279 structure Grammar : GRAMMAR 280 structure IntGrammar : INTGRAMMAR 281 structure Core : CORE 282 structure Graph : LRGRAPH 283 284 sharing Grammar = IntGrammar.Grammar = Core.Grammar = Graph.Grammar 285 sharing IntGrammar = Core.IntGrammar = Graph.IntGrammar 286 sharing Core = Graph.Core 287 288 datatype lcore = LCORE of (Core.item * Grammar.term list) list * int 289 val addLookahead : {graph : Graph.graph, 290 first : Grammar.symbol list -> Grammar.term list, 291 eop : Grammar.term list, 292 nonterms : int, 293 nullable: Grammar.nonterm -> bool, 294 produces : Grammar.nonterm -> IntGrammar.rule list, 295 rules : IntGrammar.rule list, 296 epsProds : Core.core -> IntGrammar.rule list, 297 print : string -> unit, (* for debugging *) 298 termToString : Grammar.term -> string, 299 nontermToString : Grammar.nonterm -> string} -> 300 lcore list 301 val prLcore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * 302 (Grammar.term -> string) * (string -> unit) -> 303 lcore -> unit 304 end 305 306(* LR_ERRS: errors found while constructing an LR table *) 307 308signature LR_ERRS = 309 sig 310 structure LrTable : LR_TABLE 311 312 (* RR = reduce/reduce, 313 SR = shift/reduce 314 NS: non-shiftable terminal found on the rhs of a rule 315 NOT_REDUCED n: rule number n was not reduced 316 START n : start symbol found on the rhs of rule n *) 317 318 datatype err = RR of LrTable.term * LrTable.state * int * int 319 | SR of LrTable.term * LrTable.state * int 320 | NS of LrTable.term * int 321 | NOT_REDUCED of int 322 | START of int 323 324 val summary : err list -> {rr : int, sr: int, 325 not_reduced : int, start : int,nonshift : int} 326 327 val printSummary : (string -> unit) -> err list -> unit 328 329 end 330 331(* PRINT_STRUCT: prints a structure which includes a value 'table' and a 332 structure Table whose signature matches LR_TABLE. The table in the printed 333 structure will contain the same information as the one passed to 334 printStruct, although the representation may be different. It returns 335 the number of entries left in the table after compaction.*) 336 337signature PRINT_STRUCT = 338 sig 339 structure LrTable : LR_TABLE 340 val makeStruct : 341 {table : LrTable.table, 342 name : string, 343 print: string -> unit, 344 verbose : bool 345 } -> int 346 end 347 348(* VERBOSE: signature for a structure which takes a table and creates a 349 verbose description of it *) 350 351signature VERBOSE = 352 sig 353 structure Errs : LR_ERRS 354 val printVerbose : 355 {table : Errs.LrTable.table, 356 entries : int, 357 termToString : Errs.LrTable.term -> string, 358 nontermToString : Errs.LrTable.nonterm -> string, 359 stateErrs : Errs.LrTable.state -> Errs.err list, 360 errs : Errs.err list, 361 print: string -> unit, 362 printCores : (string -> unit) -> Errs.LrTable.state -> unit, 363 printRule : (string -> unit) -> int -> unit} -> unit 364 end 365 366(* MAKE_LR_TABLE: signature for a structure which includes a structure 367 matching the signature LR_TABLE and a function which maps grammars 368 to tables *) 369 370signature MAKE_LR_TABLE = 371 sig 372 structure Grammar : GRAMMAR 373 structure Errs : LR_ERRS 374 structure LrTable : LR_TABLE 375 sharing Errs.LrTable = LrTable 376 377 sharing type LrTable.term = Grammar.term 378 sharing type LrTable.nonterm = Grammar.nonterm 379 380 (* boolean value determines whether default reductions will be used. 381 If it is true, reductions will be used. *) 382 383 val mkTable : Grammar.grammar * bool -> 384 LrTable.table * 385 (LrTable.state -> Errs.err list) * (* errors in a state *) 386 ((string -> unit) -> LrTable.state -> unit) * 387 Errs.err list (* list of all errors *) 388 end; 389 390(* SHRINK_LR_TABLE: finds unique action entry rows in the action table 391 for the LR parser *) 392 393signature SHRINK_LR_TABLE = 394 sig 395 (* Takes an action table represented as a list of action rows. 396 It returns the number of unique rows left in the action table, 397 a list of integers which maps each original row to a unique 398 row, and a list of unique rows *) 399 structure LrTable : LR_TABLE 400 val shrinkActionList : LrTable.table * bool -> 401 (int * int list * 402 ((LrTable.term,LrTable.action) LrTable.pairlist * 403 LrTable.action) list) * int 404 end 405