1(* Modified by sweeks@acm.org on 2000-8-24.
2 * Ported to MLton.
3 *)
4type int = Int.int
5
6(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi
7 *
8 * $Log$
9 * Revision 1.1  2006/06/23 03:21:27  michaeln
10 * Changed the names of the files in mlyacclib because I want these files
11 * to move into sigobj, and I don't want name-clashes, particularly with
12 * names like stream.sml.  (If you use a parser generated by mlyacc, then
13 * you need to have the files in mlyacclib available too.)
14 *
15 * Revision 1.1  2006/06/22 07:40:27  michaeln
16 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources
17 * as the base.
18 *
19 * Revision 1.1.1.1  1997/01/14 01:38:04  george
20 *   Version 109.24
21 *
22 * Revision 1.1.1.1  1996/01/31  16:01:42  george
23 * Version 109
24 *
25 *)
26
27(* base.sig: Base signature file for SML-Yacc.  This file contains signatures
28   that must be loaded before any of the files produced by ML-Yacc are loaded
29*)
30
31(* STREAM: signature for a lazy stream.*)
32
33signature STREAM =
34 sig type 'xa stream
35     val streamify : (unit -> '_a) -> '_a stream
36     val cons : '_a * '_a stream -> '_a stream
37     val get : '_a stream -> '_a * '_a stream
38 end
39
40(* LR_TABLE: signature for an LR Table.
41
42   The list of actions and gotos passed to mkLrTable must be ordered by state
43   number. The values for state 0 are the first in the list, the values for
44    state 1 are next, etc.
45*)
46
47signature LR_TABLE =
48    sig
49        datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist
50	datatype state = STATE of int
51	datatype term = T of int
52	datatype nonterm = NT of int
53	datatype action = SHIFT of state
54			| REDUCE of int
55			| ACCEPT
56			| ERROR
57	type table
58
59	val numStates : table -> int
60	val numRules : table -> int
61	val describeActions : table -> state ->
62				(term,action) pairlist * action
63	val describeGoto : table -> state -> (nonterm,state) pairlist
64	val action : table -> state * term -> action
65	val goto : table -> state * nonterm -> state
66	val initialState : table -> state
67	exception Goto of state * nonterm
68
69	val mkLrTable : {actions : ((term,action) pairlist * action) array,
70			 gotos : (nonterm,state) pairlist array,
71			 numStates : int, numRules : int,
72			 initialState : state} -> table
73    end
74
75(* TOKEN: signature revealing the internal structure of a token. This signature
76   TOKEN distinct from the signature {parser name}_TOKENS produced by ML-Yacc.
77   The {parser name}_TOKENS structures contain some types and functions to
78    construct tokens from values and positions.
79
80   The representation of token was very carefully chosen here to allow the
81   polymorphic parser to work without knowing the types of semantic values
82   or line numbers.
83
84   This has had an impact on the TOKENS structure produced by SML-Yacc, which
85   is a structure parameter to lexer functors.  We would like to have some
86   type 'a token which functions to construct tokens would create.  A
87   constructor function for a integer token might be
88
89	  INT: int * 'a * 'a -> 'a token.
90
91   This is not possible because we need to have tokens with the representation
92   given below for the polymorphic parser.
93
94   Thus our constructur functions for tokens have the form:
95
96	  INT: int * 'a * 'a -> (svalue,'a) token
97
98   This in turn has had an impact on the signature that lexers for SML-Yacc
99   must match and the types that a user must declare in the user declarations
100   section of lexers.
101*)
102
103signature TOKEN =
104    sig
105	structure LrTable : LR_TABLE
106        datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
107	val sameToken : ('a,'b) token * ('a,'b) token -> bool
108    end
109
110(* LR_PARSER: signature for a polymorphic LR parser *)
111
112signature LR_PARSER =
113    sig
114	structure Stream: STREAM
115	structure LrTable : LR_TABLE
116	structure Token : TOKEN
117
118	sharing LrTable = Token.LrTable
119
120	exception ParseError
121
122	val parse : {table : LrTable.table,
123		     lexer : ('_b,'_c) Token.token Stream.stream,
124		     arg: 'arg,
125		     saction : int *
126			       '_c *
127				(LrTable.state * ('_b * '_c * '_c)) list *
128				'arg ->
129				     LrTable.nonterm *
130				     ('_b * '_c * '_c) *
131				     ((LrTable.state *('_b * '_c * '_c)) list),
132		     void : '_b,
133		     ec : { is_keyword : LrTable.term -> bool,
134			    noShift : LrTable.term -> bool,
135			    preferred_change : (LrTable.term list * LrTable.term list) list,
136			    errtermvalue : LrTable.term -> '_b,
137			    showTerminal : LrTable.term -> string,
138			    terms: LrTable.term list,
139			    error : string * '_c * '_c -> unit
140			   },
141		     lookahead : int  (* max amount of lookahead used in *)
142				      (* error correction *)
143			} -> '_b *
144			     (('_b,'_c) Token.token Stream.stream)
145    end
146
147(* LEXER: a signature that most lexers produced for use with SML-Yacc's
148   output will match.  The user is responsible for declaring type token,
149   type pos, and type svalue in the UserDeclarations section of a lexer.
150
151   Note that type token is abstract in the lexer.  This allows SML-Yacc to
152   create a TOKENS signature for use with lexers produced by ML-Lex that
153   treats the type token abstractly.  Lexers that are functors parametrized by
154   a Tokens structure matching a TOKENS signature cannot examine the structure
155   of tokens.
156*)
157
158signature LEXER =
159   sig
160       structure UserDeclarations :
161	   sig
162	        type ('a,'b) token
163		type pos
164		type svalue
165	   end
166	val makeLexer : (int -> string) -> unit ->
167         (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
168   end
169
170(* ARG_LEXER: the %arg option of ML-Lex allows users to produce lexers which
171   also take an argument before yielding a function from unit to a token
172*)
173
174signature ARG_LEXER =
175   sig
176       structure UserDeclarations :
177	   sig
178	        type ('a,'b) token
179		type pos
180		type svalue
181		type arg
182	   end
183	val makeLexer : (int -> string) -> UserDeclarations.arg -> unit ->
184         (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
185   end
186
187(* PARSER_DATA: the signature of ParserData structures in {parser name}LrValsFun
188   produced by  SML-Yacc.  All such structures match this signature.
189
190   The {parser name}LrValsFun produces a structure which contains all the values
191   except for the lexer needed to call the polymorphic parser mentioned
192   before.
193
194*)
195
196signature PARSER_DATA =
197   sig
198        (* the type of line numbers *)
199
200	type pos
201
202	(* the type of semantic values *)
203
204	type svalue
205
206         (* the type of the user-supplied argument to the parser *)
207 	type arg
208
209	(* the intended type of the result of the parser.  This value is
210	   produced by applying extract from the structure Actions to the
211	   final semantic value resultiing from a parse.
212	 *)
213
214	type result
215
216	structure LrTable : LR_TABLE
217	structure Token : TOKEN
218	sharing Token.LrTable = LrTable
219
220	(* structure Actions contains the functions which mantain the
221	   semantic values stack in the parser.  Void is used to provide
222	   a default value for the semantic stack.
223	 *)
224
225	structure Actions :
226	  sig
227	      val actions : int * pos *
228		   (LrTable.state * (svalue * pos * pos)) list * arg->
229		         LrTable.nonterm * (svalue * pos * pos) *
230			 ((LrTable.state *(svalue * pos * pos)) list)
231	      val void : svalue
232	      val extract : svalue -> result
233	  end
234
235	(* structure EC contains information used to improve error
236	   recovery in an error-correcting parser *)
237
238	structure EC :
239	   sig
240	     val is_keyword : LrTable.term -> bool
241	     val noShift : LrTable.term -> bool
242 	     val preferred_change : (LrTable.term list * LrTable.term list) list
243	     val errtermvalue : LrTable.term -> svalue
244	     val showTerminal : LrTable.term -> string
245	     val terms: LrTable.term list
246	   end
247
248	(* table is the LR table for the parser *)
249
250	val table : LrTable.table
251    end
252
253(* signature PARSER is the signature that most user parsers created by
254   SML-Yacc will match.
255*)
256
257signature PARSER =
258    sig
259        structure Token : TOKEN
260	structure Stream : STREAM
261	exception ParseError
262
263	(* type pos is the type of line numbers *)
264
265	type pos
266
267	(* type result is the type of the result from the parser *)
268
269	type result
270
271         (* the type of the user-supplied argument to the parser *)
272 	type arg
273
274	(* type svalue is the type of semantic values for the semantic value
275	   stack
276	 *)
277
278	type svalue
279
280	(* val makeLexer is used to create a stream of tokens for the parser *)
281
282	val makeLexer : (int -> string) ->
283			 (svalue,pos) Token.token Stream.stream
284
285	(* val parse takes a stream of tokens and a function to print
286	   errors and returns a value of type result and a stream containing
287	   the unused tokens
288	 *)
289
290	val parse : int * ((svalue,pos) Token.token Stream.stream) *
291		    (string * pos * pos -> unit) * arg ->
292				result * (svalue,pos) Token.token Stream.stream
293
294	val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
295				bool
296     end
297
298(* signature ARG_PARSER is the signature that will be matched by parsers whose
299    lexer takes an additional argument.
300*)
301
302signature ARG_PARSER =
303    sig
304        structure Token : TOKEN
305	structure Stream : STREAM
306	exception ParseError
307
308	type arg
309	type lexarg
310	type pos
311	type result
312	type svalue
313
314	val makeLexer : (int -> string) -> lexarg ->
315			 (svalue,pos) Token.token Stream.stream
316	val parse : int * ((svalue,pos) Token.token Stream.stream) *
317		    (string * pos * pos -> unit) * arg ->
318				result * (svalue,pos) Token.token Stream.stream
319
320	val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
321				bool
322     end
323
324