1(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi
2 *
3 * $Log$
4 * Revision 1.1  2006/06/23 03:21:27  michaeln
5 * Changed the names of the files in mlyacclib because I want these files
6 * to move into sigobj, and I don't want name-clashes, particularly with
7 * names like stream.sml.  (If you use a parser generated by mlyacc, then
8 * you need to have the files in mlyacclib available too.)
9 *
10 * Revision 1.1  2006/06/22 07:40:27  michaeln
11 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources
12 * as the base.
13 *
14 * Revision 1.2  1997/08/26 19:18:54  jhr
15 *   Replaced used of "abstraction" with ":>".
16 *
17# Revision 1.1.1.1  1997/01/14  01:38:04  george
18#   Version 109.24
19#
20 * Revision 1.3  1996/10/03  03:36:58  jhr
21 * Qualified identifiers that are no-longer top-level (quot, rem, min, max).
22 *
23 * Revision 1.2  1996/02/26  15:02:29  george
24 *    print no longer overloaded.
25 *    use of makestring has been removed and replaced with Int.toString ..
26 *    use of IO replaced with TextIO
27 *
28 * Revision 1.1.1.1  1996/01/31  16:01:42  george
29 * Version 109
30 *
31 *)
32
33(* parser.sml:  This is a parser driver for LR tables with an error-recovery
34   routine added to it.  The routine used is described in detail in this
35   article:
36
37	'A Practical Method for LR and LL Syntactic Error Diagnosis and
38	 Recovery', by M. Burke and G. Fisher, ACM Transactions on
39	 Programming Langauges and Systems, Vol. 9, No. 2, April 1987,
40	 pp. 164-197.
41
42    This program is an implementation is the partial, deferred method discussed
43    in the article.  The algorithm and data structures used in the program
44    are described below.
45
46    This program assumes that all semantic actions are delayed.  A semantic
47    action should produce a function from unit -> value instead of producing the
48    normal value.  The parser returns the semantic value on the top of the
49    stack when accept is encountered.  The user can deconstruct this value
50    and apply the unit -> value function in it to get the answer.
51
52    It also assumes that the lexer is a lazy stream.
53
54    Data Structures:
55    ----------------
56
57	* The parser:
58
59	   The state stack has the type
60
61		 (state * (semantic value * line # * line #)) list
62
63	   The parser keeps a queue of (state stack * lexer pair).  A lexer pair
64	 consists of a terminal * value pair and a lexer.  This allows the
65	 parser to reconstruct the states for terminals to the left of a
66	 syntax error, and attempt to make error corrections there.
67
68	   The queue consists of a pair of lists (x,y).  New additions to
69	 the queue are cons'ed onto y.  The first element of x is the top
70	 of the queue.  If x is nil, then y is reversed and used
71	 in place of x.
72
73    Algorithm:
74    ----------
75
76	* The steady-state parser:
77
78	    This parser keeps the length of the queue of state stacks at
79	a steady state by always removing an element from the front when
80	another element is placed on the end.
81
82	    It has these arguments:
83
84	   stack: current stack
85	   queue: value of the queue
86	   lexPair ((terminal,value),lex stream)
87
88	When SHIFT is encountered, the state to shift to and the value are
89	are pushed onto the state stack.  The state stack and lexPair are
90	placed on the queue.  The front element of the queue is removed.
91
92	When REDUCTION is encountered, the rule is applied to the current
93	stack to yield a triple (nonterm,value,new stack).  A new
94	stack is formed by adding (goto(top state of stack,nonterm),value)
95	to the stack.
96
97	When ACCEPT is encountered, the top value from the stack and the
98	lexer are returned.
99
100	When an ERROR is encountered, fixError is called.  FixError
101	takes the arguments to the parser, fixes the error if possible and
102        returns a new set of arguments.
103
104	* The distance-parser:
105
106	This parser includes an additional argument distance.  It pushes
107	elements on the queue until it has parsed distance tokens, or an
108	ACCEPT or ERROR occurs.  It returns a stack, lexer, the number of
109	tokens left unparsed, a queue, and an action option.
110*)
111
112signature FIFO =
113  sig type 'a queue
114      val empty : 'a queue
115      exception Empty
116      val get : 'a queue -> 'a * 'a queue
117      val put : 'a * 'a queue -> 'a queue
118  end
119
120(* drt (12/15/89) -- the functor should be used in development work, but
121   it wastes space in the release version.
122
123functor ParserGen(structure LrTable : LR_TABLE
124		  structure Stream : STREAM) : LR_PARSER =
125*)
126
127structure LrParser :> LR_PARSER =
128   struct
129      structure LrTable = LrTable
130      structure Stream = Stream
131
132      structure Token : TOKEN =
133	struct
134	    structure LrTable = LrTable
135	    datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
136	    val sameToken = fn (TOKEN(t,_),TOKEN(t',_)) => t=t'
137        end
138
139      open LrTable
140      open Token
141
142      val DEBUG1 = false
143      val DEBUG2 = false
144      exception ParseError
145      exception ParseImpossible of int
146
147      structure Fifo :> FIFO =
148        struct
149	  type 'a queue = ('a list * 'a list)
150	  val empty = (nil,nil)
151	  exception Empty
152	  fun get(a::x, y) = (a, (x,y))
153	    | get(nil, nil) = raise Empty
154	    | get(nil, y) = get(rev y, nil)
155 	  fun put(a,(x,y)) = (x,a::y)
156        end
157
158      type ('a,'b) elem = (state * ('a * 'b * 'b))
159      type ('a,'b) stack = ('a,'b) elem list
160      type ('a,'b) lexv = ('a,'b) token
161      type ('a,'b) lexpair = ('a,'b) lexv * (('a,'b) lexv Stream.stream)
162      type ('a,'b) distanceParse =
163		 ('a,'b) lexpair *
164		 ('a,'b) stack *
165		 (('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
166		 int ->
167		   ('a,'b) lexpair *
168		   ('a,'b) stack *
169		   (('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
170		   int *
171		   action option
172
173      type ('a,'b) ecRecord =
174	 {is_keyword : term -> bool,
175          preferred_change : (term list * term list) list,
176	  error : string * 'b * 'b -> unit,
177	  errtermvalue : term -> 'a,
178	  terms : term list,
179	  showTerminal : term -> string,
180	  noShift : term -> bool}
181
182      local
183	 val print = fn s => TextIO.output(TextIO.stdOut,s)
184	 val println = fn s => (print s; print "\n")
185	 val showState = fn (STATE s) => "STATE " ^ (Int.toString s)
186      in
187        fun printStack(stack: ('a,'b) stack, n: int) =
188         case stack
189           of (state,_) :: rest =>
190                 (print("\t" ^ Int.toString n ^ ": ");
191                  println(showState state);
192                  printStack(rest, n+1))
193            | nil => ()
194
195        fun prAction showTerminal
196		 (stack as (state,_) :: _, (TOKEN (term,_), _), action) =
197             (println "Parse: state stack:";
198              printStack(stack, 0);
199              print("       state="
200                         ^ showState state
201                         ^ " next="
202                         ^ showTerminal term
203                         ^ " action="
204                        );
205              case action
206                of SHIFT state => println ("SHIFT " ^ (showState state))
207                 | REDUCE i => println ("REDUCE " ^ (Int.toString i))
208                 | ERROR => println "ERROR"
209		 | ACCEPT => println "ACCEPT")
210        | prAction _ _ = ()
211     end
212
213    (* ssParse: parser which maintains the queue of (state * lexvalues) in a
214	steady-state.  It takes a table, showTerminal function, saction
215	function, and fixError function.  It parses until an ACCEPT is
216	encountered, or an exception is raised.  When an error is encountered,
217	fixError is called with the arguments of parseStep (lexv,stack,and
218	queue).  It returns the lexv, and a new stack and queue adjusted so
219	that the lexv can be parsed *)
220
221    val ssParse =
222      fn (table,showTerminal,saction,fixError,arg) =>
223	let val prAction = prAction showTerminal
224	    val action = LrTable.action table
225	    val goto = LrTable.goto table
226	    fun parseStep(args as
227			 (lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
228				      lexer
229				      ),
230			  stack as (state,_) :: _,
231			  queue)) =
232	      let val nextAction = action (state,terminal)
233	          val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
234			  else ()
235	      in case nextAction
236		 of SHIFT s =>
237		  let val newStack = (s,value) :: stack
238		      val newLexPair = Stream.get lexer
239		      val (_,newQueue) =Fifo.get(Fifo.put((newStack,newLexPair),
240							    queue))
241		  in parseStep(newLexPair,(s,value)::stack,newQueue)
242		  end
243		 | REDUCE i =>
244		     (case saction(i,leftPos,stack,arg)
245		      of (nonterm,value,stack as (state,_) :: _) =>
246		          parseStep(lexPair,(goto(state,nonterm),value)::stack,
247				    queue)
248		       | _ => raise (ParseImpossible 197))
249		 | ERROR => parseStep(fixError args)
250		 | ACCEPT =>
251			(case stack
252			 of (_,(topvalue,_,_)) :: _ =>
253				let val (token,restLexer) = lexPair
254				in (topvalue,Stream.cons(token,restLexer))
255				end
256			  | _ => raise (ParseImpossible 202))
257	      end
258	    | parseStep _ = raise (ParseImpossible 204)
259	in parseStep
260	end
261
262    (*  distanceParse: parse until n tokens are shifted, or accept or
263	error are encountered.  Takes a table, showTerminal function, and
264	semantic action function.  Returns a parser which takes a lexPair
265	(lex result * lexer), a state stack, a queue, and a distance
266	(must be > 0) to parse.  The parser returns a new lex-value, a stack
267	with the nth token shifted on top, a queue, a distance, and action
268	option. *)
269
270    val distanceParse =
271      fn (table,showTerminal,saction,arg) =>
272	let val prAction = prAction showTerminal
273	    val action = LrTable.action table
274	    val goto = LrTable.goto table
275	    fun parseStep(lexPair,stack,queue,0) = (lexPair,stack,queue,0,NONE)
276	      | parseStep(lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
277				      lexer
278				     ),
279			  stack as (state,_) :: _,
280			  queue,distance) =
281	      let val nextAction = action(state,terminal)
282	          val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
283			  else ()
284	      in case nextAction
285		 of SHIFT s =>
286		  let val newStack = (s,value) :: stack
287		      val newLexPair = Stream.get lexer
288		  in parseStep(newLexPair,(s,value)::stack,
289			       Fifo.put((newStack,newLexPair),queue),distance-1)
290		  end
291		 | REDUCE i =>
292		    (case saction(i,leftPos,stack,arg)
293		      of (nonterm,value,stack as (state,_) :: _) =>
294		         parseStep(lexPair,(goto(state,nonterm),value)::stack,
295				 queue,distance)
296		      | _ => raise (ParseImpossible 240))
297		 | ERROR => (lexPair,stack,queue,distance,SOME nextAction)
298		 | ACCEPT => (lexPair,stack,queue,distance,SOME nextAction)
299	      end
300	   | parseStep _ = raise (ParseImpossible 242)
301	in parseStep : ('_a,'_b) distanceParse
302	end
303
304(* mkFixError: function to create fixError function which adjusts parser state
305   so that parse may continue in the presence of an error *)
306
307fun mkFixError({is_keyword,terms,errtermvalue,
308	      preferred_change,noShift,
309	      showTerminal,error,...} : ('_a,'_b) ecRecord,
310	     distanceParse : ('_a,'_b) distanceParse,
311	     minAdvance,maxAdvance)
312
313            ((TOKEN (term, (_, leftPos, _)), _), _, queue) =
314    let val _ = if DEBUG2 then
315			error("syntax error found at " ^ (showTerminal term),
316			      leftPos,leftPos)
317		else ()
318
319        fun tokAt(t,p) = TOKEN(t,(errtermvalue t,p,p))
320
321	val minDelta = 3
322
323	(* pull all the state * lexv elements from the queue *)
324
325	val stateList =
326	   let fun f q = let val (elem,newQueue) = Fifo.get q
327			 in elem :: (f newQueue)
328			 end handle Fifo.Empty => nil
329	   in f queue
330	   end
331
332	(* now number elements of stateList, giving distance from
333	   error token *)
334
335	val (_, numStateList) =
336	      List.foldr (fn (a,(num,r)) => (num+1,(a,num)::r)) (0, []) stateList
337
338	(* Represent the set of potential changes as a linked list.
339
340	   Values of datatype Change hold information about a potential change.
341
342	   oper = oper to be applied
343	   pos = the # of the element in stateList that would be altered.
344	   distance = the number of tokens beyond the error token which the
345	     change allows us to parse.
346	   new = new terminal * value pair at that point
347	   orig = original terminal * value pair at the point being changed.
348	 *)
349
350	datatype ('a,'b) change = CHANGE of
351	   {pos : int, distance : int, leftPos: 'b, rightPos: 'b,
352	    new : ('a,'b) lexv list, orig : ('a,'b) lexv list}
353
354
355         val showTerms = concat o map (fn TOKEN(t,_) => " " ^ showTerminal t)
356
357(* parse: given a lexPair, a stack, and the distance from the error
358   token, return the distance past the error token that we are able to parse.*)
359
360	fun parse (lexPair,stack,queuePos : int) =
361	    case distanceParse(lexPair,stack,Fifo.empty,queuePos+maxAdvance+1)
362             of (_,_,_,distance,SOME ACCEPT) =>
363		        if maxAdvance-distance-1 >= 0
364			    then maxAdvance
365			    else maxAdvance-distance-1
366	      | (_,_,_,distance,_) => maxAdvance - distance - 1
367
368(* catList: concatenate results of scanning list *)
369
370	fun catList l f = List.foldr (fn(a,r)=> f a @ r) [] l
371
372        fun keywordsDelta new = if List.exists (fn(TOKEN(t,_))=>is_keyword t) new
373	               then minDelta else 0
374
375        fun tryChange{lex,stack,pos,leftPos,rightPos,orig,new} =
376	     let val lex' = List.foldr (fn (t',p)=>(t',Stream.cons p)) lex new
377		 val distance = parse(lex',stack,
378				      pos + List.length new - List.length orig)
379	      in if distance >= minAdvance + keywordsDelta new
380		   then [CHANGE{pos=pos,leftPos=leftPos,rightPos=rightPos,
381				distance=distance,orig=orig,new=new}]
382		   else []
383	     end
384
385
386(* tryDelete: Try to delete n terminals.
387              Return single-element [success] or nil.
388	      Do not delete unshiftable terminals. *)
389
390
391    fun tryDelete n ((stack, lexPair as (TOKEN (_, (_, l, r)), _)), qPos) =
392	let fun del(0,accum,left,right,lexPair) =
393	          tryChange{lex=lexPair,stack=stack,
394			    pos=qPos,leftPos=left,rightPos=right,
395			    orig=rev accum, new=[]}
396	      | del(n,accum,left,_,(tok as TOKEN(term,(_,_,r)),lexer)) =
397		   if noShift term then []
398		   else del(n-1,tok::accum,left,r,Stream.get lexer)
399         in del(n,[],l,r,lexPair)
400        end
401
402(* tryInsert: try to insert tokens before the current terminal;
403       return a list of the successes  *)
404
405        fun tryInsert((stack,lexPair as (TOKEN(_,(_,l,_)),_)),queuePos) =
406	       catList terms (fn t =>
407		 tryChange{lex=lexPair,stack=stack,
408			   pos=queuePos,orig=[],new=[tokAt(t,l)],
409			   leftPos=l,rightPos=l})
410
411(* trySubst: try to substitute tokens for the current terminal;
412       return a list of the successes  *)
413
414        fun trySubst ((stack, (orig as TOKEN (term,(_,l,r)),lexer)),
415		      queuePos) =
416	      if noShift term then []
417	      else
418		  catList terms (fn t =>
419		      tryChange{lex=Stream.get lexer,stack=stack,
420				pos=queuePos,
421				leftPos=l,rightPos=r,orig=[orig],
422				new=[tokAt(t,r)]})
423
424     (* do_delete(toks,lexPair) tries to delete tokens "toks" from "lexPair".
425         If it succeeds, returns SOME(toks',l,r,lp), where
426	     toks' is the actual tokens (with positions and values) deleted,
427	     (l,r) are the (leftmost,rightmost) position of toks',
428	     lp is what remains of the stream after deletion
429     *)
430        fun do_delete(nil,lp as (TOKEN(_,(_,l,_)),_)) = SOME(nil,l,l,lp)
431          | do_delete([t],(tok as TOKEN(t',(_,l,r)),lp')) =
432	       if t=t'
433		   then SOME([tok],l,r,Stream.get lp')
434                   else NONE
435          | do_delete(t::rest,(tok as TOKEN(t',(_,l,_)),lp')) =
436	       if t=t'
437		   then case do_delete(rest,Stream.get lp')
438                         of SOME(deleted,_,r',lp'') =>
439			       SOME(tok::deleted,l,r',lp'')
440			  | NONE => NONE
441		   else NONE
442
443        fun tryPreferred((stack,lexPair),queuePos) =
444	    catList preferred_change (fn (delete,insert) =>
445	       if List.exists noShift delete then [] (* should give warning at
446						 parser-generation time *)
447               else case do_delete(delete,lexPair)
448                     of SOME(deleted,l,r,lp) =>
449			    tryChange{lex=lp,stack=stack,pos=queuePos,
450				      leftPos=l,rightPos=r,orig=deleted,
451				      new=map (fn t=>(tokAt(t,r))) insert}
452		      | NONE => [])
453
454	val changes = catList numStateList tryPreferred @
455	                catList numStateList tryInsert @
456			  catList numStateList trySubst @
457			    catList numStateList (tryDelete 1) @
458			      catList numStateList (tryDelete 2) @
459			        catList numStateList (tryDelete 3)
460
461	val findMaxDist = fn l =>
462	  foldr (fn (CHANGE {distance,...},high) => Int.max(distance,high)) 0 l
463
464(* maxDist: max distance past error taken that we could parse *)
465
466	val maxDist = findMaxDist changes
467
468(* remove changes which did not parse maxDist tokens past the error token *)
469
470        val changes = catList changes
471	      (fn(c as CHANGE{distance,...}) =>
472		  if distance=maxDist then [c] else [])
473
474      in case changes
475	  of (l as change :: _) =>
476	      let fun print_msg (CHANGE {new,orig,leftPos,rightPos,...}) =
477		  let val s =
478		      case (orig,new)
479			  of (_::_,[]) => "deleting " ^ (showTerms orig)
480	                   | ([],_::_) => "inserting " ^ (showTerms new)
481			   | _ => "replacing " ^ (showTerms orig) ^
482				 " with " ^ (showTerms new)
483		  in error ("syntax error: " ^ s,leftPos,rightPos)
484		  end
485
486		  val _ =
487		      (if length l > 1 andalso DEBUG2 then
488			   (print "multiple fixes possible; could fix it by:\n";
489			    app print_msg l;
490			    print "chosen correction:\n")
491		       else ();
492		       print_msg change)
493
494		  (* findNth: find nth queue entry from the error
495		   entry.  Returns the Nth queue entry and the  portion of
496		   the queue from the beginning to the nth-1 entry.  The
497		   error entry is at the end of the queue.
498
499		   Examples:
500
501		   queue = a b c d e
502		   findNth 0 = (e,a b c d)
503		   findNth 1 =  (d,a b c)
504		   *)
505
506		  val findNth = fn n =>
507		      let fun f (h::t,0) = (h,rev t)
508			    | f (_::t,n) = f(t,n-1)
509			    | f (nil,_) = let exception FindNth
510					  in raise FindNth
511					  end
512		      in f (rev stateList,n)
513		      end
514
515		  val CHANGE {pos,orig,new,...} = change
516		  val (last,queueFront) = findNth pos
517		  val (stack,lexPair) = last
518
519		  val lp1 = foldl(fn (_,(_,r)) => Stream.get r) lexPair orig
520		  val lp2 = foldr(fn(t,r)=>(t,Stream.cons r)) lp1 new
521
522		  val restQueue =
523		      Fifo.put((stack,lp2),
524			       foldl Fifo.put Fifo.empty queueFront)
525
526		  val (lexPair,stack,queue,_,_) =
527		      distanceParse(lp2,stack,restQueue,pos)
528
529	      in (lexPair,stack,queue)
530	      end
531	| nil => (error("syntax error found at " ^ (showTerminal term),
532			leftPos,leftPos); raise ParseError)
533    end
534
535   val parse = fn {arg,table,lexer,saction,void,lookahead,
536		   ec=ec as {showTerminal,...} : ('_a,'_b) ecRecord} =>
537	let val distance = 15   (* defer distance tokens *)
538	    val minAdvance = 1  (* must parse at least 1 token past error *)
539	    val maxAdvance = Int.max(lookahead,0)(* max distance for parse check *)
540	    val lexPair = Stream.get lexer
541	    val (TOKEN (_,(_,leftPos,_)),_) = lexPair
542	    val startStack = [(initialState table,(void,leftPos,leftPos))]
543	    val startQueue = Fifo.put((startStack,lexPair),Fifo.empty)
544	    val distanceParse = distanceParse(table,showTerminal,saction,arg)
545	    val fixError = mkFixError(ec,distanceParse,minAdvance,maxAdvance)
546	    val ssParse = ssParse(table,showTerminal,saction,fixError,arg)
547	    fun loop (lexPair,stack,queue,_,SOME ACCEPT) =
548		   ssParse(lexPair,stack,queue)
549	      | loop (lexPair,stack,queue,0,_) = ssParse(lexPair,stack,queue)
550	      | loop (lexPair,stack,queue,distance,SOME ERROR) =
551		 let val (lexPair,stack,queue) = fixError(lexPair,stack,queue)
552		 in loop (distanceParse(lexPair,stack,queue,distance))
553		 end
554	      | loop _ = let exception ParseInternal
555			 in raise ParseInternal
556			 end
557	in loop (distanceParse(lexPair,startStack,startQueue,distance))
558	end
559 end;
560
561(* drt (12/15/89) -- needed only when the code above is functorized
562
563structure LrParser = ParserGen(structure LrTable=LrTable
564			     structure Stream=Stream);
565*)
566