1(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi
2 *
3 * $Log$
4 * Revision 1.1  2006/06/22 07:40:27  michaeln
5 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources
6 * as the base.
7 *
8 * Revision 1.1.1.1  1997/01/14 01:38:05  george
9 *   Version 109.24
10 *
11 * Revision 1.3  1996/10/03  03:37:12  jhr
12 * Qualified identifiers that are no-longer top-level (quot, rem, min, max).
13 *
14 * Revision 1.2  1996/02/26  15:02:35  george
15 *    print no longer overloaded.
16 *    use of makestring has been removed and replaced with Int.toString ..
17 *    use of IO replaced with TextIO
18 *
19 * Revision 1.1.1.1  1996/01/31  16:01:45  george
20 * Version 109
21 *
22 *)
23
24functor mkLalr ( structure IntGrammar : INTGRAMMAR
25		structure Core : CORE
26		structure Graph : LRGRAPH
27		structure Look: LOOK
28		sharing Graph.Core = Core
29		sharing Graph.IntGrammar = Core.IntGrammar =
30			Look.IntGrammar = IntGrammar) : LALR_GRAPH =
31    struct
32	open Array List
33	infix 9 sub
34	open IntGrammar.Grammar IntGrammar Core Graph Look
35	structure Graph = Graph
36	structure Core = Core
37	structure Grammar = IntGrammar.Grammar
38	structure IntGrammar = IntGrammar
39
40	datatype tmpcore = TMPCORE of (item * term list ref) list * int
41	datatype lcore = LCORE of (item * term list) list * int
42
43
44	 val prLcore =
45	  fn a as (SymbolToString,nontermToString,termToString,print) =>
46	    let val printItem = prItem (SymbolToString,nontermToString,print)
47		val printLookahead = prLook(termToString,print)
48	    in fn (LCORE (items,state)) =>
49		(print "\n";
50		 print "state ";
51		 print (Int.toString state);
52		 print " :\n\n";
53		 List.app (fn (item,lookahead) =>
54			(print "{";
55			 printItem item;
56			 print ",";
57			 printLookahead lookahead;
58			 print "}\n")) items)
59	    end
60
61	exception Lalr of int
62
63	structure ItemList = ListOrdSet
64		(struct
65		   type elem = item * term list ref
66		   val eq = fn ((a,_),(b,_)) => eqItem(a,b)
67		   val gt = fn ((a,_),(b,_)) => gtItem(a,b)
68		 end)
69
70	structure NontermSet = ListOrdSet
71		(struct
72		   type elem = nonterm
73		   val gt = gtNonterm
74		   val eq = eqNonterm
75		 end)
76
77(* NTL: nonterms with lookahead *)
78
79	structure NTL = RbOrdSet
80		(struct
81		   type elem = nonterm * term list
82		   val gt = fn ((i,_),(j,_)) => gtNonterm(i,j)
83		   val eq = fn ((i,_),(j,_)) => eqNonterm(i,j)
84		 end)
85
86	val DEBUG = false
87
88	val addLookahead  = fn {graph,nullable,first,eop,
89				rules,produces,nonterms,epsProds,
90				print,termToString,nontermToString} =>
91	  let
92
93		val eop = Look.make_set eop
94
95		val symbolToString = fn (TERM t) => termToString t
96				      | (NONTERM t) => nontermToString t
97
98		val print = if DEBUG then print
99			    else fn _ => ()
100
101		val prLook = if DEBUG then prLook (termToString,print)
102			     else fn _ => ()
103
104		val prNonterm = print o nontermToString
105
106		val prRule = if DEBUG
107			      then prRule(symbolToString,nontermToString,print)
108			      else fn _ => ()
109
110		val printInt = print o (Int.toString : int -> string)
111
112		val printItem = prItem(symbolToString,nontermToString,print)
113
114(* look_pos: position in the rhs of a rule at which we should start placing
115   lookahead ref cells, i.e. the minimum place at which A -> x .B y, where
116   B is a nonterminal and y =*=> epsilon, or A -> x. is true.  Positions are
117   given by the number of symbols before the place.  The place before the first
118   symbol is 0, etc. *)
119
120	     val look_pos =
121		 let val positions = array(length rules,0)
122
123(* rule_pos: calculate place in the rhs of a rule at which we should start
124   placing lookahead ref cells *)
125
126		      val rule_pos = fn (RULE {rhs,...}) =>
127			case (rev rhs)
128			  of nil => 0
129			   | (TERM t) :: r => length rhs
130			   | (l as (NONTERM n) :: r) =>
131
132			      (* f assumes that everything after n in the
133				 rule has proven to be nullable so far.
134				 Remember that the rhs has been reversed,
135				 implying that this is true initially *)
136
137					(* A -> .z t B y, where y is nullable *)
138
139			      let fun f (NONTERM b :: (r as (TERM _ :: _))) =
140					(length r)
141
142					(* A -> .z B C y *)
143
144				    | f (NONTERM c :: (r as (NONTERM b :: _))) =
145					 if nullable c then f r
146					 else (length r)
147
148					(* A -> .B y, where y is nullable *)
149
150				    | f (NONTERM b :: nil) = 0
151				    | f _ = raise Fail "f"
152			      in  f l
153			      end
154
155		 	val check_rule = fn (rule as RULE {num,...}) =>
156			    let val pos = rule_pos rule
157			    in (print "look_pos: ";
158			 	prRule rule;
159				print " = ";
160				printInt pos;
161				print "\n";
162				update(positions,num,rule_pos rule))
163			    end
164		   in app check_rule rules;
165		    fn RULE{num,...} => (positions sub num)
166		   end
167
168(* rest_is_null: true for items of the form A -> x .B y, where y is nullable *)
169
170	     val rest_is_null =
171		 fn (ITEM{rule,dot, rhsAfter=NONTERM _ :: _}) =>
172			 dot >= (look_pos rule)
173		  | _ => false
174
175(* map core to a new core including only items of the form A -> x. or
176   A -> x. B y, where y =*=> epsilon.  It also adds epsilon productions to the
177   core. Each item is given a ref cell to hold the lookahead nonterminals for
178   it.*)
179
180	      val map_core =
181		let val f = fn (item as ITEM {rhsAfter=nil,...},r) =>
182				(item,ref nil) :: r
183			     | (item,r) =>
184				 if (rest_is_null item)
185				    then (item,ref nil)::r
186				    else r
187		in fn (c as CORE (items,state)) =>
188		   let val epsItems =
189			   map  (fn rule=>(ITEM{rule=rule,dot=0,rhsAfter=nil},
190					   ref (nil : term list))
191				) (epsProds c)
192		   in TMPCORE(ItemList.union(List.foldr f [] items,epsItems),state)
193		   end
194		end
195
196	      val new_nodes = map map_core (nodes graph)
197
198	      exception Find
199
200(* findRef: state * item -> lookahead ref cell for item *)
201
202	      val findRef =
203		let val states = Array.fromList new_nodes
204		    val dummy = ref nil
205		in fn (state,item) =>
206		    let val TMPCORE (l,_) = states sub state
207		    in case ItemList.find((item,dummy),l)
208				   of SOME (_,look_ref) => look_ref
209				    | NONE => (print "find failed: state ";
210					       printInt state;
211					       print "\nitem =\n";
212					       printItem item;
213					       print "\nactual items =\n";
214					       app (fn (i,_) => (printItem i;
215						    print "\n")) l;
216						raise Find)
217		    end
218		end
219
220
221(* findRuleRefs: state -> rule -> lookahead refs for rule. *)
222
223	       val findRuleRefs =
224		 let val shift = shift graph
225		 in fn state =>
226			(* handle epsilon productions *)
227		  fn (rule as RULE {rhs=nil,...}) =>
228			 [findRef(state,ITEM{rule=rule,dot=0,rhsAfter=nil})]
229		   | (rule as RULE {rhs=sym::rest,...}) =>
230		   let	val pos = Int.max(look_pos rule,1)
231			fun scan'(state,nil,pos,result) =
232				findRef(state,ITEM{rule=rule,
233						   dot=pos,
234						   rhsAfter=nil}) :: result
235			  | scan'(state,rhs as sym::rest,pos,result) =
236				scan'(shift(state,sym), rest, pos+1,
237				      findRef(state,ITEM{rule=rule,
238							 dot=pos,
239							 rhsAfter=rhs})::result)
240
241(* find first item of the form A -> x .B y, where y =*=> epsilon and
242   x is not epsilon, or A -> x.  use scan' to pick up all refs after this
243   point *)
244
245			 fun scan(state,nil,_) =
246			   [findRef(state,ITEM{rule=rule,dot=pos,rhsAfter=nil})]
247			   | scan(state,rhs,0) = scan'(state,rhs,pos,nil)
248			   | scan(state,sym::rest,place) =
249				    scan(shift(state,sym),rest,place-1)
250
251		  in scan(shift(state,sym),rest,pos-1)
252		  end
253
254	     end
255
256(* function to compute for some nonterminal n the set of nonterminals A added
257   through the closure of nonterminal n such that n =c*=> .A x, where x is
258   nullable *)
259
260	      val nonterms_w_null = fn nt =>
261		  let val collect_nonterms = fn n =>
262		    List.foldr (fn (rule as RULE {rhs=rhs as NONTERM n :: _,...},r) =>
263			   (case
264			     (rest_is_null(ITEM {dot=0,rhsAfter=rhs,rule=rule}))
265				 of true => n :: r
266				  | false => r)
267			   | (_,r) => r) [] (produces n)
268		       fun dfs(a as (n,r)) =
269			 if (NontermSet.exists a) then r
270			 else List.foldr dfs (NontermSet.insert(n,r))
271				(collect_nonterms n)
272		  in dfs(nt,NontermSet.empty)
273		  end
274
275		val nonterms_w_null =
276		   let val data = array(nonterms,NontermSet.empty)
277		       fun f n = if n=nonterms then ()
278				 else (update(data,n,nonterms_w_null (NT n));
279				       f (n+1))
280		   in (f 0; fn (NT nt) => data sub nt)
281		   end
282
283(* look_info: for some nonterminal n the set of nonterms A added
284   through the closure of the nonterminal such that n =c+=> .Ax and the
285   lookahead accumlated for each nonterm A *)
286
287		val look_info = fn nt =>
288		   let val collect_nonterms = fn n =>
289		      List.foldr (fn (RULE {rhs=NONTERM n :: t,...},r) =>
290			     (case NTL.find ((n,nil),r)
291			      of SOME (key,data) =>
292			         NTL.insert((n,Look.union(data,first t)),r)
293			       | NONE => NTL.insert ((n,first t),r))
294			     | (_,r) => r)
295		            NTL.empty (produces n)
296			fun dfs(a as ((key1,data1),r)) =
297			  case (NTL.find a)
298			   of SOME (_,data2) =>
299			       NTL.insert((key1,Look.union(data1,data2)),r)
300			    | NONE => NTL.fold dfs (collect_nonterms key1)
301						   (NTL.insert a)
302		    in dfs((nt,nil),NTL.empty)
303		    end
304
305		val look_info =
306		  if not DEBUG then look_info
307		  else fn nt =>
308		       (print "look_info of "; prNonterm nt; print "=\n";
309			let val info = look_info nt
310			in (NTL.app (fn (nt,lookahead) =>
311				    (prNonterm nt; print ": "; prLook lookahead;
312				     print "\n\n")) info;
313			   info)
314			end)
315
316(* prop_look: propagate lookaheads for nonterms added in the closure of a
317   nonterm.  Lookaheads must be propagated from each nonterminal m to
318   all nonterminals { n | m =c+=> nx, where x=*=>epsilon} *)
319
320		  val prop_look = fn ntl =>
321		    let val upd_lookhd = fn new_look => fn (nt,r) =>
322			  case NTL.find ((nt,new_look),r)
323			  of SOME (_,old_look) =>
324			     NTL.insert((nt, Look.union(new_look,old_look)),r)
325			   | NONE => raise (Lalr 241)
326		         val upd_nonterm = fn ((nt,look),r) =>
327			   NontermSet.fold (upd_lookhd look)
328					   (nonterms_w_null nt) r
329		     in NTL.fold upd_nonterm ntl ntl
330		     end
331
332		val prop_look =
333		  if not DEBUG then prop_look
334		  else fn ntl =>
335		    (print "prop_look =\n";
336		     let val info = prop_look ntl
337		     in (NTL.app (fn (nt,lookahead) =>
338				    (prNonterm nt;
339				     print ": ";
340				     prLook lookahead;
341				     print "\n\n")) info; info)
342		     end)
343
344(* now put the information from these functions together.  Create a function
345   which takes a nonterminal n and returns a list of triplets of
346	 (a nonterm added through closure,
347	  the lookahead for the nonterm,
348	  whether the nonterm should include the lookahead for the nonterminal
349	  whose closure is being taken (i.e. first(y) for an item j of the
350	  form A -> x .n y and lookahead(j) if y =*=> epsilon)
351*)
352
353		 val closure_nonterms =
354		   let val data =
355			  array(nonterms,nil: (nonterm * term list * bool) list)
356		       val do_nonterm = fn i =>
357			let val nonterms_followed_by_null =
358				nonterms_w_null i
359			    val nonterms_added_through_closure =
360			      NTL.make_list (prop_look (look_info i))
361			    val result =
362			    map (fn (nt,l) =>
363			 (nt,l,NontermSet.exists (nt,nonterms_followed_by_null))
364				) nonterms_added_through_closure
365			 in if DEBUG then
366			       (print "closure_nonterms = ";
367				prNonterm i;
368				print "\n";
369				app (fn (nt,look,nullable) =>
370				  (prNonterm nt;
371				   print ":";
372				   prLook look;
373				   case nullable
374				     of false => print "(false)\n"
375				      | true => print "(true)\n")) result;
376				print "\n")
377			     else ();
378			     result
379			 end
380		        fun f i =
381			  if i=nonterms then ()
382			  else (update(data,i,do_nonterm (NT i)); f (i+1))
383			val _ = f 0
384		    in fn (NT i) => data sub i
385		    end
386
387(* add_nonterm_lookahead: Add lookahead to all completion items for rules added
388   when the closure of a given nonterm in some state is taken.  It returns
389   a list of lookahead refs to which the given nonterm's lookahead should
390   be propagated.   For each rule, it must trace the shift/gotos in the LR(0)
391   graph to find all items of the form A-> x .B y where y =*=> epsilon or
392   A -> x.
393*)
394
395		val add_nonterm_lookahead = fn (nt,state) =>
396		  let val f = fn ((nt,lookahead,nullable),r) =>
397			let val refs = map (findRuleRefs state) (produces nt)
398			    val refs = List.concat refs
399			    val _ = app (fn r =>
400				     r := (Look.union (!r,lookahead))) refs
401			in if nullable then refs @ r else r
402			end
403		 in List.foldr f [] (closure_nonterms nt)
404		 end
405
406(* scan_core: Scan a core for all items of the form A -> x .B y.  Applies
407   add_nonterm_lookahead to each such B, and then merges first(y) into
408   the list of refs returned by add_nonterm_lookahead.  It returns
409   a list of ref * ref list for all the items where y =*=> epsilon *)
410
411		val scan_core = fn (CORE (l,state)) =>
412		  let fun f ((item as ITEM{rhsAfter= NONTERM b :: y,
413					   dot,rule})::t,r) =
414			(case (add_nonterm_lookahead(b,state))
415			  of nil => r
416			   | l =>
417			    let val first_y = first y
418			        val newr  = if dot >= (look_pos rule)
419					then (findRef(state,item),l)::r
420					else r
421			    in (app (fn r =>
422					 r := Look.union(!r,first_y)) l;
423			        f (t,newr))
424			    end)
425			| f (_ :: t,r) = f (t,r)
426			| f (nil,r) = r
427		  in f (l,nil)
428		  end
429
430(* add end-of-parse symbols to set of items consisting of all items
431   immediately derived from the start symbol *)
432
433		val add_eop = fn (c as CORE (l,state),eop) =>
434		  let fun f (item as ITEM {rule,dot,...}) =
435		    let val refs = findRuleRefs state rule
436		    in
437
438(* first take care of kernal items.  Add the end-of-parse symbols to
439   the lookahead sets for these items.  Epsilon productions of the
440   start symbol do not need to be handled specially because they will
441   be in the kernal also *)
442
443		       app (fn r => r := Look.union(!r,eop)) refs;
444
445(* now take care of closure items.  These are all nonterminals C which
446   have a derivation S =+=> .C x, where x is nullable *)
447
448		       if dot >= (look_pos rule) then
449		       	  case item
450			  of ITEM{rhsAfter=NONTERM b :: _,...} =>
451			     (case add_nonterm_lookahead(b,state)
452			      of nil => ()
453			       | l => app (fn r => r := Look.union(!r,eop)) l)
454			   | _ => ()
455		       else ()
456		    end
457		  in app f l
458		  end
459
460		val iterate = fn l =>
461		   let fun f lookahead (nil,done) = done
462			 | f lookahead (h::t,done) =
463			    let val old = !h
464			    in h := Look.union (old,lookahead);
465			       if (length (!h)) <> (length old)
466					 then f lookahead (t,false)
467					 else f lookahead(t,done)
468			    end
469		       fun g ((from,to)::rest,done) =
470			let val new_done = f (!from) (to,done)
471			in g (rest,new_done)
472			end
473			 | g (nil,done) = done
474		       fun loop true = ()
475			 | loop false = loop (g (l,true))
476		   in loop false
477		   end
478
479		val lookahead = List.concat (map scan_core (nodes graph))
480
481(* used to scan the item list of a TMPCORE and remove the items not
482   being reduced *)
483
484		val create_lcore_list =
485			fn ((item as ITEM {rhsAfter=nil,...},ref l),r) =>
486				(item,l) :: r
487			 | (_,r) => r
488
489	in  add_eop(Graph.core graph 0,eop);
490	    iterate lookahead;
491	    map (fn (TMPCORE (l,state)) =>
492		       LCORE (List.foldr create_lcore_list [] l, state)) new_nodes
493	end
494end;
495