1(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
2
3(* parser.sml:  This is a parser driver for LR tables with an error-recovery
4   routine added to it.  The routine used is described in detail in this
5   article:
6
7	'A Practical Method for LR and LL Syntactic Error Diagnosis and
8	 Recovery', by M. Burke and G. Fisher, ACM Transactions on
9	 Programming Langauges and Systems, Vol. 9, No. 2, April 1987,
10	 pp. 164-197.
11
12    This program is an implementation is the partial, deferred method discussed
13    in the article.  The algorithm and data structures used in the program
14    are described below.  
15
16    This program assumes that all semantic actions are delayed.  A semantic
17    action should produce a function from unit -> value instead of producing the
18    normal value.  The parser returns the semantic value on the top of the
19    stack when accept is encountered.  The user can deconstruct this value
20    and apply the unit -> value function in it to get the answer.
21
22    It also assumes that the lexer is a lazy stream.
23
24    Data Structures:
25    ----------------
26	
27	* The parser:
28
29	   The state stack has the type
30
31		 (state * (semantic value * line # * line #)) list
32
33	   The parser keeps a queue of (state stack * lexer pair).  A lexer pair
34	 consists of a terminal * value pair and a lexer.  This allows the 
35	 parser to reconstruct the states for terminals to the left of a
36	 syntax error, and attempt to make error corrections there.
37
38	   The queue consists of a pair of lists (x,y).  New additions to
39	 the queue are cons'ed onto y.  The first element of x is the top
40	 of the queue.  If x is nil, then y is reversed and used
41	 in place of x.
42
43    Algorithm:
44    ----------
45
46	* The steady-state parser:  
47
48	    This parser keeps the length of the queue of state stacks at
49	a steady state by always removing an element from the front when
50	another element is placed on the end.
51
52	    It has these arguments:
53
54	   stack: current stack
55	   queue: value of the queue
56	   lexPair ((terminal,value),lex stream)
57
58	When SHIFT is encountered, the state to shift to and the value are
59	are pushed onto the state stack.  The state stack and lexPair are
60	placed on the queue.  The front element of the queue is removed.
61
62	When REDUCTION is encountered, the rule is applied to the current
63	stack to yield a triple (nonterm,value,new stack).  A new
64	stack is formed by adding (goto(top state of stack,nonterm),value)
65	to the stack.
66
67	When ACCEPT is encountered, the top value from the stack and the
68	lexer are returned.
69
70	When an ERROR is encountered, fixError is called.  FixError
71	takes the arguments to the parser, fixes the error if possible and
72        returns a new set of arguments.
73
74	* The distance-parser:
75
76	This parser includes an additional argument distance.  It pushes
77	elements on the queue until it has parsed distance tokens, or an
78	ACCEPT or ERROR occurs.  It returns a stack, lexer, the number of
79	tokens left unparsed, a queue, and an action option.
80*)
81
82signature FIFO = 
83  sig type 'a queue
84      val empty : 'a queue
85      exception Empty
86      val get : 'a queue -> 'a * 'a queue
87      val put : 'a * 'a queue -> 'a queue
88  end
89
90(* drt (12/15/89) -- the functor should be used in development work, but
91   it wastes space in the release version.
92
93functor ParserGen(structure LrTable : LR_TABLE
94		  structure Stream : STREAM) : LR_PARSER =
95*)
96
97structure LrParser :> LR_PARSER =
98   struct
99      structure LrTable = LrTable
100      structure Stream = Stream
101
102      fun eqT (LrTable.T i, LrTable.T i') = i = i'
103
104      structure Token : TOKEN =
105	struct
106	    structure LrTable = LrTable
107	    datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
108	    val sameToken = fn (TOKEN(t,_),TOKEN(t',_)) => eqT (t,t')
109        end
110
111      open LrTable
112      open Token
113
114      val DEBUG1 = false
115      val DEBUG2 = false
116      exception ParseError
117      exception ParseImpossible of int
118
119      structure Fifo :> FIFO =
120        struct
121	  type 'a queue = ('a list * 'a list)
122	  val empty = (nil,nil)
123	  exception Empty
124	  fun get(a::x, y) = (a, (x,y))
125	    | get(nil, nil) = raise Empty
126	    | get(nil, y) = get(rev y, nil)
127 	  fun put(a,(x,y)) = (x,a::y)
128        end
129
130      type ('a,'b) elem = (state * ('a * 'b * 'b))
131      type ('a,'b) stack = ('a,'b) elem list
132      type ('a,'b) lexv = ('a,'b) token
133      type ('a,'b) lexpair = ('a,'b) lexv * (('a,'b) lexv Stream.stream)
134      type ('a,'b) distanceParse =
135		 ('a,'b) lexpair *
136		 ('a,'b) stack * 
137		 (('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
138		 int ->
139		   ('a,'b) lexpair *
140		   ('a,'b) stack * 
141		   (('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
142		   int *
143		   action option
144
145      type ('a,'b) ecRecord =
146	 {is_keyword : term -> bool,
147          preferred_change : (term list * term list) list,
148	  error : string * 'b * 'b -> unit,
149	  errtermvalue : term -> 'a,
150	  terms : term list,
151	  showTerminal : term -> string,
152	  noShift : term -> bool}
153
154      local 
155	 val print = fn s => TextIO.output(TextIO.stdOut,s)
156	 val println = fn s => (print s; print "\n")
157	 val showState = fn (STATE s) => "STATE " ^ (Int.toString s)
158      in
159        fun printStack(stack: ('a,'b) stack, n: int) =
160         case stack
161           of (state,_) :: rest =>
162                 (print("\t" ^ Int.toString n ^ ": ");
163                  println(showState state);
164                  printStack(rest, n+1))
165            | nil => ()
166                
167        fun prAction showTerminal
168		 (stack as (state,_) :: _, next as (TOKEN (term,_),_), action) =
169             (println "Parse: state stack:";
170              printStack(stack, 0);
171              print("       state="
172                         ^ showState state	
173                         ^ " next="
174                         ^ showTerminal term
175                         ^ " action="
176                        );
177              case action
178                of SHIFT state => println ("SHIFT " ^ (showState state))
179                 | REDUCE i => println ("REDUCE " ^ (Int.toString i))
180                 | ERROR => println "ERROR"
181		 | ACCEPT => println "ACCEPT")
182        | prAction _ (_,_,action) = ()
183     end
184
185    (* ssParse: parser which maintains the queue of (state * lexvalues) in a
186	steady-state.  It takes a table, showTerminal function, saction
187	function, and fixError function.  It parses until an ACCEPT is
188	encountered, or an exception is raised.  When an error is encountered,
189	fixError is called with the arguments of parseStep (lexv,stack,and
190	queue).  It returns the lexv, and a new stack and queue adjusted so
191	that the lexv can be parsed *)
192	
193    val ssParse =
194      fn (table,showTerminal,saction,fixError,arg) =>
195	let val prAction = prAction showTerminal
196	    val action = LrTable.action table
197	    val goto = LrTable.goto table
198	    fun parseStep(args as
199			 (lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
200				      lexer
201				      ),
202			  stack as (state,_) :: _,
203			  queue)) =
204	      let val nextAction = action (state,terminal)
205	          val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
206			  else ()
207	      in case nextAction
208		 of SHIFT s =>
209		  let val newStack = (s,value) :: stack
210		      val newLexPair = Stream.get lexer
211		      val (_,newQueue) =Fifo.get(Fifo.put((newStack,newLexPair),
212							    queue))
213		  in parseStep(newLexPair,(s,value)::stack,newQueue)
214		  end
215		 | REDUCE i =>
216		     (case saction(i,leftPos,stack,arg)
217		      of (nonterm,value,stack as (state,_) :: _) =>
218		          parseStep(lexPair,(goto(state,nonterm),value)::stack,
219				    queue)
220		       | _ => raise (ParseImpossible 197))
221		 | ERROR => parseStep(fixError args)
222		 | ACCEPT => 
223			(case stack
224			 of (_,(topvalue,_,_)) :: _ =>
225				let val (token,restLexer) = lexPair
226				in (topvalue,Stream.cons(token,restLexer))
227				end
228			  | _ => raise (ParseImpossible 202))
229	      end
230	    | parseStep _ = raise (ParseImpossible 204)
231	in parseStep
232	end
233
234    (*  distanceParse: parse until n tokens are shifted, or accept or
235	error are encountered.  Takes a table, showTerminal function, and
236	semantic action function.  Returns a parser which takes a lexPair
237	(lex result * lexer), a state stack, a queue, and a distance
238	(must be > 0) to parse.  The parser returns a new lex-value, a stack
239	with the nth token shifted on top, a queue, a distance, and action
240	option. *)
241
242    val distanceParse =
243      fn (table,showTerminal,saction,arg) =>
244	let val prAction = prAction showTerminal
245	    val action = LrTable.action table
246	    val goto = LrTable.goto table
247	    fun parseStep(lexPair,stack,queue,0) = (lexPair,stack,queue,0,NONE)
248	      | parseStep(lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
249				      lexer
250				     ),
251			  stack as (state,_) :: _,
252			  queue,distance) =
253	      let val nextAction = action(state,terminal)
254	          val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
255			  else ()
256	      in case nextAction
257		 of SHIFT s =>
258		  let val newStack = (s,value) :: stack
259		      val newLexPair = Stream.get lexer
260		  in parseStep(newLexPair,(s,value)::stack,
261			       Fifo.put((newStack,newLexPair),queue),distance-1)
262		  end
263		 | REDUCE i =>
264		    (case saction(i,leftPos,stack,arg)
265		      of (nonterm,value,stack as (state,_) :: _) =>
266		         parseStep(lexPair,(goto(state,nonterm),value)::stack,
267				 queue,distance)
268		      | _ => raise (ParseImpossible 240))
269		 | ERROR => (lexPair,stack,queue,distance,SOME nextAction)
270		 | ACCEPT => (lexPair,stack,queue,distance,SOME nextAction)
271	      end
272	   | parseStep _ = raise (ParseImpossible 242)
273	in parseStep : ('_a,'_b) distanceParse 
274	end
275
276(* mkFixError: function to create fixError function which adjusts parser state
277   so that parse may continue in the presence of an error *)
278
279fun mkFixError({is_keyword,terms,errtermvalue,
280	      preferred_change,noShift,
281	      showTerminal,error,...} : ('_a,'_b) ecRecord,
282	     distanceParse : ('_a,'_b) distanceParse,
283	     minAdvance,maxAdvance) 
284
285            (lexv as (TOKEN (term,value as (_,leftPos,_)),_),stack,queue) =
286    let val _ = if DEBUG2 then
287			error("syntax error found at " ^ (showTerminal term),
288			      leftPos,leftPos)
289		else ()
290
291        fun tokAt(t,p) = TOKEN(t,(errtermvalue t,p,p))
292
293	val minDelta = 3
294
295	(* pull all the state * lexv elements from the queue *)
296
297	val stateList = 
298	   let fun f q = let val (elem,newQueue) = Fifo.get q
299			 in elem :: (f newQueue)
300			 end handle Fifo.Empty => nil
301	   in f queue
302	   end
303
304	(* now number elements of stateList, giving distance from
305	   error token *)
306
307	val (_, numStateList) =
308	      List.foldr (fn (a,(num,r)) => (num+1,(a,num)::r)) (0, []) stateList
309
310	(* Represent the set of potential changes as a linked list.
311
312	   Values of datatype Change hold information about a potential change.
313
314	   oper = oper to be applied
315	   pos = the # of the element in stateList that would be altered.
316	   distance = the number of tokens beyond the error token which the
317	     change allows us to parse.
318	   new = new terminal * value pair at that point
319	   orig = original terminal * value pair at the point being changed.
320	 *)
321
322	datatype ('a,'b) change = CHANGE of
323	   {pos : int, distance : int, leftPos: 'b, rightPos: 'b,
324	    new : ('a,'b) lexv list, orig : ('a,'b) lexv list}
325
326
327         val showTerms = concat o map (fn TOKEN(t,_) => " " ^ showTerminal t)
328
329	 val printChange = fn c =>
330	  let val CHANGE {distance,new,orig,pos,...} = c
331	  in (print ("{distance= " ^ (Int.toString distance));
332	      print (",orig ="); print(showTerms orig);
333	      print (",new ="); print(showTerms new);
334	      print (",pos= " ^ (Int.toString pos));
335	      print "}\n")
336	  end
337
338	val printChangeList = app printChange
339
340(* parse: given a lexPair, a stack, and the distance from the error
341   token, return the distance past the error token that we are able to parse.*)
342
343	fun parse (lexPair,stack,queuePos : int) =
344	    case distanceParse(lexPair,stack,Fifo.empty,queuePos+maxAdvance+1)
345             of (_,_,_,distance,SOME ACCEPT) => 
346		        if maxAdvance-distance-1 >= 0 
347			    then maxAdvance 
348			    else maxAdvance-distance-1
349	      | (_,_,_,distance,_) => maxAdvance - distance - 1
350
351(* catList: concatenate results of scanning list *)
352
353	fun catList l f = List.foldr (fn(a,r)=> f a @ r) [] l
354
355        fun keywordsDelta new = if List.exists (fn(TOKEN(t,_))=>is_keyword t) new
356	               then minDelta else 0
357
358        fun tryChange{lex,stack,pos,leftPos,rightPos,orig,new} =
359	     let val lex' = List.foldr (fn (t',p)=>(t',Stream.cons p)) lex new
360		 val distance = parse(lex',stack,pos+length new-length orig)
361	      in if distance >= minAdvance + keywordsDelta new 
362		   then [CHANGE{pos=pos,leftPos=leftPos,rightPos=rightPos,
363				distance=distance,orig=orig,new=new}] 
364		   else []
365	     end
366
367
368(* tryDelete: Try to delete n terminals.
369              Return single-element [success] or nil.
370	      Do not delete unshiftable terminals. *)
371
372
373    fun tryDelete n ((stack,lexPair as (TOKEN(term,(_,l,r)),_)),qPos) =
374	let fun del(0,accum,left,right,lexPair) =
375	          tryChange{lex=lexPair,stack=stack,
376			    pos=qPos,leftPos=left,rightPos=right,
377			    orig=rev accum, new=[]}
378	      | del(n,accum,left,right,(tok as TOKEN(term,(_,_,r)),lexer)) =
379		   if noShift term then []
380		   else del(n-1,tok::accum,left,r,Stream.get lexer)
381         in del(n,[],l,r,lexPair)
382        end
383
384(* tryInsert: try to insert tokens before the current terminal;
385       return a list of the successes  *)
386
387        fun tryInsert((stack,lexPair as (TOKEN(_,(_,l,_)),_)),queuePos) =
388	       catList terms (fn t =>
389		 tryChange{lex=lexPair,stack=stack,
390			   pos=queuePos,orig=[],new=[tokAt(t,l)],
391			   leftPos=l,rightPos=l})
392			      
393(* trySubst: try to substitute tokens for the current terminal;
394       return a list of the successes  *)
395
396        fun trySubst ((stack,lexPair as (orig as TOKEN (term,(_,l,r)),lexer)),
397		      queuePos) =
398	      if noShift term then []
399	      else
400		  catList terms (fn t =>
401		      tryChange{lex=Stream.get lexer,stack=stack,
402				pos=queuePos,
403				leftPos=l,rightPos=r,orig=[orig],
404				new=[tokAt(t,r)]})
405
406     (* do_delete(toks,lexPair) tries to delete tokens "toks" from "lexPair".
407         If it succeeds, returns SOME(toks',l,r,lp), where
408	     toks' is the actual tokens (with positions and values) deleted,
409	     (l,r) are the (leftmost,rightmost) position of toks', 
410	     lp is what remains of the stream after deletion 
411     *)
412        fun do_delete(nil,lp as (TOKEN(_,(_,l,_)),_)) = SOME(nil,l,l,lp)
413          | do_delete([t],(tok as TOKEN(t',(_,l,r)),lp')) =
414	       if eqT (t, t')
415		   then SOME([tok],l,r,Stream.get lp')
416                   else NONE
417          | do_delete(t::rest,(tok as TOKEN(t',(_,l,r)),lp')) =
418	       if eqT (t,t')
419		   then case do_delete(rest,Stream.get lp')
420                         of SOME(deleted,l',r',lp'') =>
421			       SOME(tok::deleted,l,r',lp'')
422			  | NONE => NONE
423		   else NONE
424			     
425        fun tryPreferred((stack,lexPair),queuePos) =
426	    catList preferred_change (fn (delete,insert) =>
427	       if List.exists noShift delete then [] (* should give warning at
428						 parser-generation time *)
429               else case do_delete(delete,lexPair)
430                     of SOME(deleted,l,r,lp) => 
431			    tryChange{lex=lp,stack=stack,pos=queuePos,
432				      leftPos=l,rightPos=r,orig=deleted,
433				      new=map (fn t=>(tokAt(t,r))) insert}
434		      | NONE => [])
435
436	val changes = catList numStateList tryPreferred @
437	                catList numStateList tryInsert @
438			  catList numStateList trySubst @
439			    catList numStateList (tryDelete 1) @
440			      catList numStateList (tryDelete 2) @
441			        catList numStateList (tryDelete 3)
442
443	val findMaxDist = fn l => 
444	  foldr (fn (CHANGE {distance,...},high) => Int.max(distance,high)) 0 l
445
446(* maxDist: max distance past error taken that we could parse *)
447
448	val maxDist = findMaxDist changes
449
450(* remove changes which did not parse maxDist tokens past the error token *)
451
452        val changes = catList changes 
453	      (fn(c as CHANGE{distance,...}) => 
454		  if distance=maxDist then [c] else [])
455
456      in case changes 
457	  of (l as change :: _) =>
458	      let fun print_msg (CHANGE {new,orig,leftPos,rightPos,...}) =
459		  let val s = 
460		      case (orig,new)
461			  of (_::_,[]) => "deleting " ^ (showTerms orig)
462	                   | ([],_::_) => "inserting " ^ (showTerms new)
463			   | _ => "replacing " ^ (showTerms orig) ^
464				 " with " ^ (showTerms new)
465		  in error ("syntax error: " ^ s,leftPos,rightPos)
466		  end
467		   
468		  val _ = 
469		      (if length l > 1 andalso DEBUG2 then
470			   (print "multiple fixes possible; could fix it by:\n";
471			    app print_msg l;
472			    print "chosen correction:\n")
473		       else ();
474		       print_msg change)
475
476		  (* findNth: find nth queue entry from the error
477		   entry.  Returns the Nth queue entry and the  portion of
478		   the queue from the beginning to the nth-1 entry.  The
479		   error entry is at the end of the queue.
480
481		   Examples:
482
483		   queue = a b c d e
484		   findNth 0 = (e,a b c d)
485		   findNth 1 =  (d,a b c)
486		   *)
487
488		  val findNth = fn n =>
489		      let fun f (h::t,0) = (h,rev t)
490			    | f (h::t,n) = f(t,n-1)
491			    | f (nil,_) = let exception FindNth
492					  in raise FindNth
493					  end
494		      in f (rev stateList,n)
495		      end
496		
497		  val CHANGE {pos,orig,new,...} = change
498		  val (last,queueFront) = findNth pos
499		  val (stack,lexPair) = last
500
501		  val lp1 = foldl(fn (_,(_,r)) => Stream.get r) lexPair orig
502		  val lp2 = foldr(fn(t,r)=>(t,Stream.cons r)) lp1 new
503
504		  val restQueue = 
505		      Fifo.put((stack,lp2),
506			       foldl Fifo.put Fifo.empty queueFront)
507
508		  val (lexPair,stack,queue,_,_) =
509		      distanceParse(lp2,stack,restQueue,pos)
510
511	      in (lexPair,stack,queue)
512	      end
513	| nil => (error("syntax error found at " ^ (showTerminal term),
514			leftPos,leftPos); raise ParseError)
515    end
516
517   val parse = fn {arg,table,lexer,saction,void,lookahead,
518		   ec=ec as {showTerminal,...} : ('_a,'_b) ecRecord} =>
519	let val distance = 15   (* defer distance tokens *)
520	    val minAdvance = 1  (* must parse at least 1 token past error *)
521	    val maxAdvance = Int.max(lookahead,0)(* max distance for parse check *)
522	    val lexPair = Stream.get lexer
523	    val (TOKEN (_,(_,leftPos,_)),_) = lexPair
524	    val startStack = [(initialState table,(void,leftPos,leftPos))]
525	    val startQueue = Fifo.put((startStack,lexPair),Fifo.empty)
526	    val distanceParse = distanceParse(table,showTerminal,saction,arg)
527	    val fixError = mkFixError(ec,distanceParse,minAdvance,maxAdvance)
528	    val ssParse = ssParse(table,showTerminal,saction,fixError,arg)
529	    fun loop (lexPair,stack,queue,_,SOME ACCEPT) =
530		   ssParse(lexPair,stack,queue)
531	      | loop (lexPair,stack,queue,0,_) = ssParse(lexPair,stack,queue)
532	      | loop (lexPair,stack,queue,distance,SOME ERROR) =
533		 let val (lexPair,stack,queue) = fixError(lexPair,stack,queue)
534		 in loop (distanceParse(lexPair,stack,queue,distance))
535		 end
536	      | loop _ = let exception ParseInternal
537			 in raise ParseInternal
538			 end
539	in loop (distanceParse(lexPair,startStack,startQueue,distance))
540	end
541 end;
542
543