1(* Modified by sweeks@acm.org on 2000-8-24.
2 * Ported to MLton.
3 *)
4type int = Int.int
5
6(* ML-Yacc Parser Generator (c) 1989, 1991 Andrew W. Appel, David R. Tarditi
7 *
8 * $Log$
9 * Revision 1.1  2006/06/22 07:40:27  michaeln
10 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources
11 * as the base.
12 *
13 * Revision 1.1.1.1  1998/04/08 18:40:17  george
14 * Version 110.5
15 *
16 * Revision 1.2  1997/05/20 16:23:21  dbm
17 *   SML '97 sharing.
18 *
19# Revision 1.1.1.1  1997/01/14  01:38:06  george
20#   Version 109.24
21#
22 * Revision 1.2  1996/02/26  15:02:38  george
23 *    print no longer overloaded.
24 *    use of makestring has been removed and replaced with Int.toString ..
25 *    use of IO replaced with TextIO
26 *
27 * Revision 1.1.1.1  1996/01/31  16:01:46  george
28 * Version 109
29 *
30 *)
31
32signature HEADER =
33  sig
34    type pos = int
35    val lineno : pos ref
36    val text : string list ref
37
38    type inputSource
39    val newSource : string * TextIO.instream * TextIO.outstream -> inputSource
40    val error : inputSource -> pos -> string -> unit
41    val warn : inputSource -> pos -> string -> unit
42    val errorOccurred : inputSource -> unit -> bool
43
44    datatype symbol = SYMBOL of string * pos
45    val symbolName : symbol -> string
46    val symbolPos : symbol -> pos
47    val symbolMake : string * int -> symbol
48
49    type ty
50    val tyName : ty -> string
51    val tyMake : string -> ty
52
53    (* associativities: each kind of associativity is assigned a unique
54       integer *)
55
56    datatype prec = LEFT | RIGHT | NONASSOC
57    datatype control = NODEFAULT | VERBOSE | PARSER_NAME of symbol |
58	               FUNCTOR of string  | START_SYM of symbol |
59		       NSHIFT of symbol list | POS of string | PURE |
60		       PARSE_ARG of string * string |
61		       TOKEN_SIG_INFO of string
62
63    datatype rule = RULE of {lhs : symbol, rhs : symbol list,
64		             code : string, prec : symbol option}
65
66    datatype declData = DECL of
67			{eop : symbol list,
68			 keyword : symbol list,
69			 nonterm : (symbol * ty option) list option,
70			 prec : (prec * (symbol list)) list,
71			 change: (symbol list * symbol list) list,
72			 term : (symbol * ty option) list option,
73			 control : control list,
74			 value : (symbol * string) list}
75
76     val join_decls : declData * declData * inputSource * pos -> declData
77
78     type parseResult
79     val getResult : parseResult -> string * declData * rule list
80  end;
81
82signature PARSE_GEN_PARSER =
83  sig
84    structure Header : HEADER
85    val parse : string -> Header.parseResult * Header.inputSource
86  end;
87
88signature PARSE_GEN =
89  sig
90    val parseGen : string -> unit
91  end;
92
93signature GRAMMAR =
94    sig
95
96	datatype term = T of int
97	datatype nonterm = NT of int
98	datatype symbol = TERM of term | NONTERM of nonterm
99
100	(* grammar:
101	     terminals should be numbered from 0 to terms-1,
102	     nonterminals should be numbered from 0 to nonterms-1,
103	     rules should be numbered between 0 and (length rules) - 1,
104	     higher precedence binds tighter,
105	     start nonterminal should not occur on the rhs of any rule
106	*)
107
108	datatype grammar = GRAMMAR of
109			{rules: {lhs : nonterm, rhs : symbol list,
110				 precedence : int option, rulenum : int } list,
111			terms: int,
112			nonterms: int,
113			start : nonterm,
114			eop : term list,
115			noshift : term list,
116			precedence : term -> int option,
117			termToString : term -> string,
118			nontermToString : nonterm -> string}
119   end
120
121(* signature for internal version of grammar *)
122
123signature INTGRAMMAR =
124    sig
125	structure Grammar  : GRAMMAR
126	structure SymbolAssoc : TABLE
127	structure NontermAssoc : TABLE
128
129	sharing type SymbolAssoc.key = Grammar.symbol
130	sharing type NontermAssoc.key = Grammar.nonterm
131
132	datatype rule = RULE of
133		{lhs : Grammar.nonterm,
134		 rhs : Grammar.symbol list,
135
136	(* internal number of rule - convenient for producing LR graph *)
137
138		 num : int,
139		 rulenum : int,
140		 precedence : int option}
141
142	val gtTerm : Grammar.term * Grammar.term -> bool
143	val eqTerm : Grammar.term * Grammar.term -> bool
144
145	val gtNonterm : Grammar.nonterm * Grammar.nonterm -> bool
146	val eqNonterm : Grammar.nonterm * Grammar.nonterm -> bool
147
148	val gtSymbol : Grammar.symbol * Grammar.symbol -> bool
149	val eqSymbol : Grammar.symbol * Grammar.symbol -> bool
150
151	(* Debugging information will be generated only if DEBUG is true. *)
152
153	val DEBUG : bool
154
155	val prRule : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
156				(string -> 'b) -> rule -> unit
157	val prGrammar : (Grammar.symbol -> string)*(Grammar.nonterm -> string) *
158				(string -> unit) -> Grammar.grammar -> unit
159    end
160
161signature CORE =
162    sig
163	structure Grammar : GRAMMAR
164	structure IntGrammar : INTGRAMMAR
165	sharing Grammar = IntGrammar.Grammar
166
167	datatype item = ITEM of
168			{ rule : IntGrammar.rule,
169			  dot : int,
170
171(* rhsAfter: The portion of the rhs of a rule that lies after the dot *)
172
173			  rhsAfter: Grammar.symbol list }
174
175(* eqItem and gtItem compare items *)
176
177	val eqItem : item * item -> bool
178	val gtItem : item * item -> bool
179
180(* functions for maintaining ordered item lists *)
181
182	val insert : item * item list -> item list
183	val union : item list * item list -> item list
184
185(* core:  a set of items.  It is represented by an ordered list of items.
186   The list is in ascending order The rule numbers and the positions of the
187   dots are used to order the items. *)
188
189	datatype core = CORE of item list * int (* state # *)
190
191(* gtCore and eqCore compare the lists of items *)
192
193	val gtCore : core * core -> bool
194	val eqCore : core * core -> bool
195
196(* functions for debugging *)
197
198	val prItem : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
199				(string -> unit) -> item -> unit
200	val prCore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
201				(string -> unit) -> core -> unit
202end
203
204signature CORE_UTILS =
205    sig
206
207	structure Grammar : GRAMMAR
208	structure IntGrammar : INTGRAMMAR
209	structure Core : CORE
210
211	sharing Grammar = IntGrammar.Grammar = Core.Grammar
212	sharing IntGrammar = Core.IntGrammar
213
214(* mkFuncs: create functions for the set of productions derived from a
215   nonterminal, the cores that result from shift/gotos from a core,
216    and return a list of rules *)
217
218	val mkFuncs : Grammar.grammar ->
219		{ produces : Grammar.nonterm -> IntGrammar.rule list,
220
221(* shifts: take a core and compute all the cores that result from shifts/gotos
222   on symbols *)
223
224		  shifts : Core.core -> (Grammar.symbol*Core.item list) list,
225		  rules: IntGrammar.rule list,
226
227(* epsProds: take a core compute epsilon productions for it *)
228
229		  epsProds : Core.core -> IntGrammar.rule list}
230	end
231
232signature LRGRAPH =
233    sig
234	structure Grammar : GRAMMAR
235	structure IntGrammar : INTGRAMMAR
236	structure Core : CORE
237
238	sharing Grammar = IntGrammar.Grammar = Core.Grammar
239	sharing IntGrammar = Core.IntGrammar
240
241	type graph
242	val edges : Core.core * graph -> {edge:Grammar.symbol,to:Core.core} list
243	val nodes : graph -> Core.core list
244	val shift : graph -> int * Grammar.symbol -> int (* int = state # *)
245	val core : graph -> int -> Core.core (* get core for a state *)
246
247(* mkGraph: compute the LR(0) sets of items *)
248
249	val mkGraph :  Grammar.grammar ->
250			 {graph : graph,
251			  produces : Grammar.nonterm -> IntGrammar.rule list,
252			  rules : IntGrammar.rule list,
253			  epsProds: Core.core -> IntGrammar.rule list}
254
255	val prGraph: (Grammar.symbol -> string)*(Grammar.nonterm -> string) *
256				(string -> unit) -> graph -> unit
257    end
258
259signature LOOK =
260    sig
261 	structure Grammar : GRAMMAR
262	structure IntGrammar : INTGRAMMAR
263	sharing Grammar = IntGrammar.Grammar
264
265	val union : Grammar.term list * Grammar.term list -> Grammar.term list
266	val make_set : Grammar.term list -> Grammar.term list
267
268	val mkFuncs :  {rules : IntGrammar.rule list, nonterms : int,
269			produces : Grammar.nonterm -> IntGrammar.rule list} ->
270		 	    {nullable: Grammar.nonterm -> bool,
271			     first : Grammar.symbol list -> Grammar.term list}
272
273	val prLook : (Grammar.term -> string) * (string -> unit) ->
274			Grammar.term list -> unit
275   end
276
277signature LALR_GRAPH =
278    sig
279	structure Grammar : GRAMMAR
280	structure IntGrammar : INTGRAMMAR
281	structure Core : CORE
282	structure Graph : LRGRAPH
283
284	sharing Grammar = IntGrammar.Grammar = Core.Grammar = Graph.Grammar
285	sharing IntGrammar = Core.IntGrammar = Graph.IntGrammar
286	sharing Core = Graph.Core
287
288	datatype lcore = LCORE of (Core.item * Grammar.term list) list * int
289	val addLookahead : {graph : Graph.graph,
290			    first : Grammar.symbol list -> Grammar.term list,
291			    eop : Grammar.term list,
292			    nonterms : int,
293			    nullable: Grammar.nonterm -> bool,
294			    produces : Grammar.nonterm -> IntGrammar.rule list,
295			    rules : IntGrammar.rule list,
296			    epsProds : Core.core -> IntGrammar.rule list,
297			    print : string -> unit,  (* for debugging *)
298			    termToString : Grammar.term -> string,
299			    nontermToString : Grammar.nonterm -> string} ->
300				lcore list
301	val prLcore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
302		      (Grammar.term -> string) * (string -> unit) ->
303					 lcore -> unit
304    end
305
306(* LR_ERRS: errors found while constructing an LR table *)
307
308signature LR_ERRS =
309  sig
310    structure LrTable : LR_TABLE
311
312    (* RR = reduce/reduce,
313       SR = shift/reduce
314       NS: non-shiftable terminal found on the rhs of a rule
315       NOT_REDUCED n: rule number n was not reduced
316       START n : start symbol found on the rhs of rule n *)
317
318    datatype err = RR of LrTable.term * LrTable.state * int * int
319		 | SR of LrTable.term * LrTable.state * int
320		 | NS of LrTable.term * int
321		 | NOT_REDUCED of int
322	         | START of int
323
324     val summary : err list -> {rr : int, sr: int,
325			    not_reduced : int, start : int,nonshift : int}
326
327     val printSummary : (string -> unit) -> err list -> unit
328
329  end
330
331(* PRINT_STRUCT: prints a structure which includes a value 'table' and a
332   structure Table whose signature matches LR_TABLE.  The table in the printed
333   structure will contain the same information as the one passed to
334   printStruct, although the representation may be different.  It returns
335   the number of entries left in the table after compaction.*)
336
337signature PRINT_STRUCT =
338  sig
339	structure LrTable : LR_TABLE
340	val makeStruct :
341		{table : LrTable.table,
342		 name : string,
343		 print: string -> unit,
344                 verbose : bool
345		} -> int
346  end
347
348(* VERBOSE: signature for a structure which takes a table and creates a
349   verbose description of it *)
350
351signature VERBOSE =
352  sig
353	structure Errs : LR_ERRS
354	val printVerbose :
355		{table : Errs.LrTable.table,
356                 entries : int,
357	         termToString : Errs.LrTable.term -> string,
358	         nontermToString : Errs.LrTable.nonterm -> string,
359		 stateErrs : Errs.LrTable.state -> Errs.err list,
360		 errs : Errs.err list,
361		 print: string -> unit,
362		 printCores : (string -> unit) -> Errs.LrTable.state -> unit,
363		 printRule : (string -> unit) -> int -> unit} -> unit
364  end
365
366(* MAKE_LR_TABLE: signature for a structure which includes a structure
367   matching the signature LR_TABLE and a function which maps grammars
368   to tables *)
369
370signature MAKE_LR_TABLE =
371   sig
372	structure Grammar : GRAMMAR
373	structure Errs : LR_ERRS
374	structure LrTable : LR_TABLE
375	sharing Errs.LrTable = LrTable
376
377	sharing type LrTable.term = Grammar.term
378	sharing type LrTable.nonterm = Grammar.nonterm
379
380	(* boolean value determines whether default reductions will be used.
381	   If it is true, reductions will be used. *)
382
383	val mkTable : Grammar.grammar * bool ->
384	       LrTable.table *
385	      (LrTable.state -> Errs.err list) *   (* errors in a state *)
386	      ((string -> unit) -> LrTable.state -> unit) *
387	       Errs.err list	(* list of all errors *)
388   end;
389
390(* SHRINK_LR_TABLE: finds unique action entry rows in the  action table
391   for the LR parser *)
392
393signature SHRINK_LR_TABLE =
394   sig
395       (* Takes an action table represented as a list of action rows.
396          It returns the number of unique rows left in the action table,
397          a list of integers which maps each original row to a unique
398          row, and a list of unique rows *)
399       structure LrTable : LR_TABLE
400       val shrinkActionList : LrTable.table * bool ->
401	        (int * int list *
402	           ((LrTable.term,LrTable.action) LrTable.pairlist *
403		    LrTable.action) list) * int
404    end
405