1(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi 2 * 3 * $Log$ 4 * Revision 1.1 2006/06/22 07:40:27 michaeln 5 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources 6 * as the base. 7 * 8 * Revision 1.1.1.1 1997/01/14 01:38:05 george 9 * Version 109.24 10 * 11 * Revision 1.3 1996/10/03 03:37:12 jhr 12 * Qualified identifiers that are no-longer top-level (quot, rem, min, max). 13 * 14 * Revision 1.2 1996/02/26 15:02:35 george 15 * print no longer overloaded. 16 * use of makestring has been removed and replaced with Int.toString .. 17 * use of IO replaced with TextIO 18 * 19 * Revision 1.1.1.1 1996/01/31 16:01:45 george 20 * Version 109 21 * 22 *) 23 24functor mkLalr ( structure IntGrammar : INTGRAMMAR 25 structure Core : CORE 26 structure Graph : LRGRAPH 27 structure Look: LOOK 28 sharing Graph.Core = Core 29 sharing Graph.IntGrammar = Core.IntGrammar = 30 Look.IntGrammar = IntGrammar) : LALR_GRAPH = 31 struct 32 open Array List 33 infix 9 sub 34 open IntGrammar.Grammar IntGrammar Core Graph Look 35 structure Graph = Graph 36 structure Core = Core 37 structure Grammar = IntGrammar.Grammar 38 structure IntGrammar = IntGrammar 39 40 datatype tmpcore = TMPCORE of (item * term list ref) list * int 41 datatype lcore = LCORE of (item * term list) list * int 42 43 44 val prLcore = 45 fn a as (SymbolToString,nontermToString,termToString,print) => 46 let val printItem = prItem (SymbolToString,nontermToString,print) 47 val printLookahead = prLook(termToString,print) 48 in fn (LCORE (items,state)) => 49 (print "\n"; 50 print "state "; 51 print (Int.toString state); 52 print " :\n\n"; 53 List.app (fn (item,lookahead) => 54 (print "{"; 55 printItem item; 56 print ","; 57 printLookahead lookahead; 58 print "}\n")) items) 59 end 60 61 exception Lalr of int 62 63 structure ItemList = ListOrdSet 64 (struct 65 type elem = item * term list ref 66 val eq = fn ((a,_),(b,_)) => eqItem(a,b) 67 val gt = fn ((a,_),(b,_)) => gtItem(a,b) 68 end) 69 70 structure NontermSet = ListOrdSet 71 (struct 72 type elem = nonterm 73 val gt = gtNonterm 74 val eq = eqNonterm 75 end) 76 77(* NTL: nonterms with lookahead *) 78 79 structure NTL = RbOrdSet 80 (struct 81 type elem = nonterm * term list 82 val gt = fn ((i,_),(j,_)) => gtNonterm(i,j) 83 val eq = fn ((i,_),(j,_)) => eqNonterm(i,j) 84 end) 85 86 val DEBUG = false 87 88 val addLookahead = fn {graph,nullable,first,eop, 89 rules,produces,nonterms,epsProds, 90 print,termToString,nontermToString} => 91 let 92 93 val eop = Look.make_set eop 94 95 val symbolToString = fn (TERM t) => termToString t 96 | (NONTERM t) => nontermToString t 97 98 val print = if DEBUG then print 99 else fn _ => () 100 101 val prLook = if DEBUG then prLook (termToString,print) 102 else fn _ => () 103 104 val prNonterm = print o nontermToString 105 106 val prRule = if DEBUG 107 then prRule(symbolToString,nontermToString,print) 108 else fn _ => () 109 110 val printInt = print o (Int.toString : int -> string) 111 112 val printItem = prItem(symbolToString,nontermToString,print) 113 114(* look_pos: position in the rhs of a rule at which we should start placing 115 lookahead ref cells, i.e. the minimum place at which A -> x .B y, where 116 B is a nonterminal and y =*=> epsilon, or A -> x. is true. Positions are 117 given by the number of symbols before the place. The place before the first 118 symbol is 0, etc. *) 119 120 val look_pos = 121 let val positions = array(length rules,0) 122 123(* rule_pos: calculate place in the rhs of a rule at which we should start 124 placing lookahead ref cells *) 125 126 val rule_pos = fn (RULE {rhs,...}) => 127 case (rev rhs) 128 of nil => 0 129 | (TERM t) :: r => length rhs 130 | (l as (NONTERM n) :: r) => 131 132 (* f assumes that everything after n in the 133 rule has proven to be nullable so far. 134 Remember that the rhs has been reversed, 135 implying that this is true initially *) 136 137 (* A -> .z t B y, where y is nullable *) 138 139 let fun f (NONTERM b :: (r as (TERM _ :: _))) = 140 (length r) 141 142 (* A -> .z B C y *) 143 144 | f (NONTERM c :: (r as (NONTERM b :: _))) = 145 if nullable c then f r 146 else (length r) 147 148 (* A -> .B y, where y is nullable *) 149 150 | f (NONTERM b :: nil) = 0 151 | f _ = raise Fail "f" 152 in f l 153 end 154 155 val check_rule = fn (rule as RULE {num,...}) => 156 let val pos = rule_pos rule 157 in (print "look_pos: "; 158 prRule rule; 159 print " = "; 160 printInt pos; 161 print "\n"; 162 update(positions,num,rule_pos rule)) 163 end 164 in app check_rule rules; 165 fn RULE{num,...} => (positions sub num) 166 end 167 168(* rest_is_null: true for items of the form A -> x .B y, where y is nullable *) 169 170 val rest_is_null = 171 fn (ITEM{rule,dot, rhsAfter=NONTERM _ :: _}) => 172 dot >= (look_pos rule) 173 | _ => false 174 175(* map core to a new core including only items of the form A -> x. or 176 A -> x. B y, where y =*=> epsilon. It also adds epsilon productions to the 177 core. Each item is given a ref cell to hold the lookahead nonterminals for 178 it.*) 179 180 val map_core = 181 let val f = fn (item as ITEM {rhsAfter=nil,...},r) => 182 (item,ref nil) :: r 183 | (item,r) => 184 if (rest_is_null item) 185 then (item,ref nil)::r 186 else r 187 in fn (c as CORE (items,state)) => 188 let val epsItems = 189 map (fn rule=>(ITEM{rule=rule,dot=0,rhsAfter=nil}, 190 ref (nil : term list)) 191 ) (epsProds c) 192 in TMPCORE(ItemList.union(List.foldr f [] items,epsItems),state) 193 end 194 end 195 196 val new_nodes = map map_core (nodes graph) 197 198 exception Find 199 200(* findRef: state * item -> lookahead ref cell for item *) 201 202 val findRef = 203 let val states = Array.fromList new_nodes 204 val dummy = ref nil 205 in fn (state,item) => 206 let val TMPCORE (l,_) = states sub state 207 in case ItemList.find((item,dummy),l) 208 of SOME (_,look_ref) => look_ref 209 | NONE => (print "find failed: state "; 210 printInt state; 211 print "\nitem =\n"; 212 printItem item; 213 print "\nactual items =\n"; 214 app (fn (i,_) => (printItem i; 215 print "\n")) l; 216 raise Find) 217 end 218 end 219 220 221(* findRuleRefs: state -> rule -> lookahead refs for rule. *) 222 223 val findRuleRefs = 224 let val shift = shift graph 225 in fn state => 226 (* handle epsilon productions *) 227 fn (rule as RULE {rhs=nil,...}) => 228 [findRef(state,ITEM{rule=rule,dot=0,rhsAfter=nil})] 229 | (rule as RULE {rhs=sym::rest,...}) => 230 let val pos = Int.max(look_pos rule,1) 231 fun scan'(state,nil,pos,result) = 232 findRef(state,ITEM{rule=rule, 233 dot=pos, 234 rhsAfter=nil}) :: result 235 | scan'(state,rhs as sym::rest,pos,result) = 236 scan'(shift(state,sym), rest, pos+1, 237 findRef(state,ITEM{rule=rule, 238 dot=pos, 239 rhsAfter=rhs})::result) 240 241(* find first item of the form A -> x .B y, where y =*=> epsilon and 242 x is not epsilon, or A -> x. use scan' to pick up all refs after this 243 point *) 244 245 fun scan(state,nil,_) = 246 [findRef(state,ITEM{rule=rule,dot=pos,rhsAfter=nil})] 247 | scan(state,rhs,0) = scan'(state,rhs,pos,nil) 248 | scan(state,sym::rest,place) = 249 scan(shift(state,sym),rest,place-1) 250 251 in scan(shift(state,sym),rest,pos-1) 252 end 253 254 end 255 256(* function to compute for some nonterminal n the set of nonterminals A added 257 through the closure of nonterminal n such that n =c*=> .A x, where x is 258 nullable *) 259 260 val nonterms_w_null = fn nt => 261 let val collect_nonterms = fn n => 262 List.foldr (fn (rule as RULE {rhs=rhs as NONTERM n :: _,...},r) => 263 (case 264 (rest_is_null(ITEM {dot=0,rhsAfter=rhs,rule=rule})) 265 of true => n :: r 266 | false => r) 267 | (_,r) => r) [] (produces n) 268 fun dfs(a as (n,r)) = 269 if (NontermSet.exists a) then r 270 else List.foldr dfs (NontermSet.insert(n,r)) 271 (collect_nonterms n) 272 in dfs(nt,NontermSet.empty) 273 end 274 275 val nonterms_w_null = 276 let val data = array(nonterms,NontermSet.empty) 277 fun f n = if n=nonterms then () 278 else (update(data,n,nonterms_w_null (NT n)); 279 f (n+1)) 280 in (f 0; fn (NT nt) => data sub nt) 281 end 282 283(* look_info: for some nonterminal n the set of nonterms A added 284 through the closure of the nonterminal such that n =c+=> .Ax and the 285 lookahead accumlated for each nonterm A *) 286 287 val look_info = fn nt => 288 let val collect_nonterms = fn n => 289 List.foldr (fn (RULE {rhs=NONTERM n :: t,...},r) => 290 (case NTL.find ((n,nil),r) 291 of SOME (key,data) => 292 NTL.insert((n,Look.union(data,first t)),r) 293 | NONE => NTL.insert ((n,first t),r)) 294 | (_,r) => r) 295 NTL.empty (produces n) 296 fun dfs(a as ((key1,data1),r)) = 297 case (NTL.find a) 298 of SOME (_,data2) => 299 NTL.insert((key1,Look.union(data1,data2)),r) 300 | NONE => NTL.fold dfs (collect_nonterms key1) 301 (NTL.insert a) 302 in dfs((nt,nil),NTL.empty) 303 end 304 305 val look_info = 306 if not DEBUG then look_info 307 else fn nt => 308 (print "look_info of "; prNonterm nt; print "=\n"; 309 let val info = look_info nt 310 in (NTL.app (fn (nt,lookahead) => 311 (prNonterm nt; print ": "; prLook lookahead; 312 print "\n\n")) info; 313 info) 314 end) 315 316(* prop_look: propagate lookaheads for nonterms added in the closure of a 317 nonterm. Lookaheads must be propagated from each nonterminal m to 318 all nonterminals { n | m =c+=> nx, where x=*=>epsilon} *) 319 320 val prop_look = fn ntl => 321 let val upd_lookhd = fn new_look => fn (nt,r) => 322 case NTL.find ((nt,new_look),r) 323 of SOME (_,old_look) => 324 NTL.insert((nt, Look.union(new_look,old_look)),r) 325 | NONE => raise (Lalr 241) 326 val upd_nonterm = fn ((nt,look),r) => 327 NontermSet.fold (upd_lookhd look) 328 (nonterms_w_null nt) r 329 in NTL.fold upd_nonterm ntl ntl 330 end 331 332 val prop_look = 333 if not DEBUG then prop_look 334 else fn ntl => 335 (print "prop_look =\n"; 336 let val info = prop_look ntl 337 in (NTL.app (fn (nt,lookahead) => 338 (prNonterm nt; 339 print ": "; 340 prLook lookahead; 341 print "\n\n")) info; info) 342 end) 343 344(* now put the information from these functions together. Create a function 345 which takes a nonterminal n and returns a list of triplets of 346 (a nonterm added through closure, 347 the lookahead for the nonterm, 348 whether the nonterm should include the lookahead for the nonterminal 349 whose closure is being taken (i.e. first(y) for an item j of the 350 form A -> x .n y and lookahead(j) if y =*=> epsilon) 351*) 352 353 val closure_nonterms = 354 let val data = 355 array(nonterms,nil: (nonterm * term list * bool) list) 356 val do_nonterm = fn i => 357 let val nonterms_followed_by_null = 358 nonterms_w_null i 359 val nonterms_added_through_closure = 360 NTL.make_list (prop_look (look_info i)) 361 val result = 362 map (fn (nt,l) => 363 (nt,l,NontermSet.exists (nt,nonterms_followed_by_null)) 364 ) nonterms_added_through_closure 365 in if DEBUG then 366 (print "closure_nonterms = "; 367 prNonterm i; 368 print "\n"; 369 app (fn (nt,look,nullable) => 370 (prNonterm nt; 371 print ":"; 372 prLook look; 373 case nullable 374 of false => print "(false)\n" 375 | true => print "(true)\n")) result; 376 print "\n") 377 else (); 378 result 379 end 380 fun f i = 381 if i=nonterms then () 382 else (update(data,i,do_nonterm (NT i)); f (i+1)) 383 val _ = f 0 384 in fn (NT i) => data sub i 385 end 386 387(* add_nonterm_lookahead: Add lookahead to all completion items for rules added 388 when the closure of a given nonterm in some state is taken. It returns 389 a list of lookahead refs to which the given nonterm's lookahead should 390 be propagated. For each rule, it must trace the shift/gotos in the LR(0) 391 graph to find all items of the form A-> x .B y where y =*=> epsilon or 392 A -> x. 393*) 394 395 val add_nonterm_lookahead = fn (nt,state) => 396 let val f = fn ((nt,lookahead,nullable),r) => 397 let val refs = map (findRuleRefs state) (produces nt) 398 val refs = List.concat refs 399 val _ = app (fn r => 400 r := (Look.union (!r,lookahead))) refs 401 in if nullable then refs @ r else r 402 end 403 in List.foldr f [] (closure_nonterms nt) 404 end 405 406(* scan_core: Scan a core for all items of the form A -> x .B y. Applies 407 add_nonterm_lookahead to each such B, and then merges first(y) into 408 the list of refs returned by add_nonterm_lookahead. It returns 409 a list of ref * ref list for all the items where y =*=> epsilon *) 410 411 val scan_core = fn (CORE (l,state)) => 412 let fun f ((item as ITEM{rhsAfter= NONTERM b :: y, 413 dot,rule})::t,r) = 414 (case (add_nonterm_lookahead(b,state)) 415 of nil => r 416 | l => 417 let val first_y = first y 418 val newr = if dot >= (look_pos rule) 419 then (findRef(state,item),l)::r 420 else r 421 in (app (fn r => 422 r := Look.union(!r,first_y)) l; 423 f (t,newr)) 424 end) 425 | f (_ :: t,r) = f (t,r) 426 | f (nil,r) = r 427 in f (l,nil) 428 end 429 430(* add end-of-parse symbols to set of items consisting of all items 431 immediately derived from the start symbol *) 432 433 val add_eop = fn (c as CORE (l,state),eop) => 434 let fun f (item as ITEM {rule,dot,...}) = 435 let val refs = findRuleRefs state rule 436 in 437 438(* first take care of kernal items. Add the end-of-parse symbols to 439 the lookahead sets for these items. Epsilon productions of the 440 start symbol do not need to be handled specially because they will 441 be in the kernal also *) 442 443 app (fn r => r := Look.union(!r,eop)) refs; 444 445(* now take care of closure items. These are all nonterminals C which 446 have a derivation S =+=> .C x, where x is nullable *) 447 448 if dot >= (look_pos rule) then 449 case item 450 of ITEM{rhsAfter=NONTERM b :: _,...} => 451 (case add_nonterm_lookahead(b,state) 452 of nil => () 453 | l => app (fn r => r := Look.union(!r,eop)) l) 454 | _ => () 455 else () 456 end 457 in app f l 458 end 459 460 val iterate = fn l => 461 let fun f lookahead (nil,done) = done 462 | f lookahead (h::t,done) = 463 let val old = !h 464 in h := Look.union (old,lookahead); 465 if (length (!h)) <> (length old) 466 then f lookahead (t,false) 467 else f lookahead(t,done) 468 end 469 fun g ((from,to)::rest,done) = 470 let val new_done = f (!from) (to,done) 471 in g (rest,new_done) 472 end 473 | g (nil,done) = done 474 fun loop true = () 475 | loop false = loop (g (l,true)) 476 in loop false 477 end 478 479 val lookahead = List.concat (map scan_core (nodes graph)) 480 481(* used to scan the item list of a TMPCORE and remove the items not 482 being reduced *) 483 484 val create_lcore_list = 485 fn ((item as ITEM {rhsAfter=nil,...},ref l),r) => 486 (item,l) :: r 487 | (_,r) => r 488 489 in add_eop(Graph.core graph 0,eop); 490 iterate lookahead; 491 map (fn (TMPCORE (l,state)) => 492 LCORE (List.foldr create_lcore_list [] l, state)) new_nodes 493 end 494end; 495