1(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi 2 * 3 * $Log$ 4 * Revision 1.1 2006/06/22 07:40:27 michaeln 5 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources 6 * as the base. 7 * 8 * Revision 1.1.1.1 1997/01/14 01:38:05 george 9 * Version 109.24 10 * 11 * Revision 1.1.1.1 1996/01/31 16:01:45 george 12 * Version 109 13 * 14 *) 15 16functor mkCoreUtils(structure Core : CORE) : CORE_UTILS = 17 struct 18 open Array List 19 infix 9 sub 20 val DEBUG = true 21 structure Core = Core 22 structure IntGrammar = Core.IntGrammar 23 structure Grammar = IntGrammar.Grammar 24 25 open Grammar IntGrammar Core 26 27 structure Assoc = SymbolAssoc 28 29 structure NtList = ListOrdSet 30 (struct 31 type elem = nonterm 32 val eq = eqNonterm 33 val gt = gtNonterm 34 end) 35 36 val mkFuncs = fn (GRAMMAR {rules,terms,nonterms,...}) => 37 let val derives=array(nonterms,nil : rule list) 38 39(* sort rules by their lhs nonterminal by placing them in an array indexed 40 in their lhs nonterminal *) 41 42 val _ = 43 let val f = fn {lhs=lhs as (NT n), rhs, precedence,rulenum} => 44 let val rule=RULE{lhs=lhs,rhs=rhs,precedence=precedence, 45 rulenum=rulenum,num=0} 46 in update(derives,n,rule::(derives sub n)) 47 end 48 in app f rules 49 end 50 51(* renumber rules so that rule numbers increase monotonically with 52 the number of their lhs nonterminal, and so that rules are numbered 53 sequentially. **Functions below assume that this number is true**, 54 i.e. productions for nonterm i are numbered from j to k, 55 productions for nonterm i+1 are numbered from k+1 to m, and 56 productions for nonterm 0 start at 0 *) 57 58 val _ = 59 let val f = 60 fn (RULE{lhs,rhs,precedence,rulenum,num}, (l,i)) => 61 (RULE{lhs=lhs,rhs=rhs, precedence=precedence, 62 rulenum=rulenum, num=i}::l,i+1) 63 fun g(i,num) = 64 if i<nonterms then 65 let val (l,n) = 66 List.foldr f ([], num) (derives sub i) 67 in update(derives,i,rev l); g(i+1,n) 68 end 69 else () 70 in g(0,0) 71 end 72 73(* list of rules - sorted by rule number. *) 74 75 val rules = 76 let fun g i = 77 if i < nonterms then (derives sub i) @ (g (i+1)) 78 else nil 79 in g 0 80 end 81 82(* produces: set of productions with nonterminal n as the lhs. The set 83 of productions *must* be sorted by rule number, because functions 84 below assume that this list is sorted *) 85 86 val produces = fn (NT n) => 87 if DEBUG andalso (n<0 orelse n>=nonterms) then 88 let exception Produces of int in raise (Produces n) end 89 else derives sub n 90 91 val memoize = fn f => 92 let fun loop i = if i = nonterms then nil 93 else f (NT i) :: (loop (i+1)) 94 val data = Array.fromList(loop 0) 95 in fn (NT i) => data sub i 96 end 97 98 (* compute nonterminals which must be added to a closure when a given 99 nonterminal is added, i.e all nonterminals C for each nonterminal A such 100 that A =*=> Cx *) 101 102 val nontermClosure = 103 let val collectNonterms = fn n => 104 List.foldr (fn (r,l) => 105 case r 106 of RULE {rhs=NONTERM n :: _,...} => 107 NtList.insert(n,l) 108 | _ => l) NtList.empty (produces n) 109 val closureNonterm = fn n => 110 NtList.closure(NtList.singleton n, 111 collectNonterms) 112 in memoize closureNonterm 113 end 114 115(* ntShifts: Take the items produced by a nonterminal, and sort them 116 by their first symbol. For each first symbol, make sure the item 117 list associated with the symbol is sorted also. ** This function 118 assumes that the item list returned by produces is sorted ** 119 120 Create a table of item lists keyed by symbols. Scan the list 121 of items produced by a nonterminal, and insert those with a first 122 symbol on to the beginning of the item list for that symbol, creating 123 a list if necessary. Since produces returns an item list that is 124 already in order, the list for each symbol will also end up in order. 125 *) 126 127 fun sortItems nt = 128 let fun add_item (a as RULE{rhs=symbol::rest,...},r) = 129 let val item = ITEM{rule=a,dot=1,rhsAfter=rest} 130 in Assoc.insert((symbol,case Assoc.find (symbol,r) 131 of SOME l => item::l 132 | NONE => [item]),r) 133 end 134 | add_item (_,r) = r 135 in List.foldr add_item Assoc.empty (produces nt) 136 end 137 138 val ntShifts = memoize sortItems 139 140(* getNonterms: get the nonterminals with a . before them in a core. 141 Returns a list of nonterminals in ascending order *) 142 143 fun getNonterms l = 144 List.foldr (fn (ITEM {rhsAfter=NONTERM sym ::_, ...},r) => 145 NtList.insert(sym,r) 146 | (_,r) => r) [] l 147 148(* closureNonterms: compute the nonterminals that would have a . before them 149 in the closure of the core. Returns a list of nonterminals in ascending 150 order *) 151 fun closureNonterms a = 152 let val nonterms = getNonterms a 153 in List.foldr (fn (nt,r) => 154 NtList.union(nontermClosure nt,r)) 155 nonterms nonterms 156 end 157 158(* shifts: compute the core sets that result from shift/gotoing on 159 the closure of a kernal set. The items in core sets are sorted, of 160 course. 161 162 (1) compute the core sets that result just from items added 163 through the closure operation. 164 (2) then add the shift/gotos on kernal items. 165 166 We can do (1) the following way. Keep a table which for each shift/goto 167symbol gives the list of items that result from shifting or gotoing on the 168symbol. Compute the nonterminals that would have dots before them in the 169closure of the kernal set. For each of these nonterminals, we already have an 170item list in sorted order for each possible shift symbol. Scan the nonterminal 171list from back to front. For each nonterminal, prepend the shift/goto list 172for each shift symbol to the list already in the table. 173 174 We end up with the list of items in correct order for each shift/goto 175symbol. We have kept the item lists in order, scanned the nonterminals from 176back to front (=> that the items end up in ascending order), and never had any 177duplicate items (each item is derived from only one nonterminal). *) 178 179 fun shifts (CORE (itemList,_)) = 180 let 181 182(* mergeShiftItems: add an item list for a shift/goto symbol to the table *) 183 184fun mergeShiftItems (args as ((k,l),r)) = 185 case Assoc.find(k,r) 186 of NONE => Assoc.insert args 187 | SOME old => Assoc.insert ((k,l@old),r) 188 189(* mergeItems: add all items derived from a nonterminal to the table. We've 190 kept these items sorted by their shift/goto symbol (the first symbol on 191 their rhs) *) 192 193 fun mergeItems (n,r) = 194 Assoc.fold mergeShiftItems (ntShifts n) r 195 196(* nonterms: a list of nonterminals that are in a core after the 197 closure operation *) 198 199 val nonterms = closureNonterms itemList 200 201(* now create a table which for each shift/goto symbol gives the sorted list 202 of closure items which would result from first taking all the closure items 203 and then sorting them by the shift/goto symbols *) 204 205 val newsets = List.foldr mergeItems Assoc.empty nonterms 206 207(* finally prepare to insert the kernal items of a core *) 208 209 fun insertItem ((k,i),r) = 210 case (Assoc.find(k,r)) 211 of NONE => Assoc.insert((k,[i]),r) 212 | SOME l => Assoc.insert((k,Core.insert(i,l)),r) 213 fun shiftCores(ITEM{rule,dot,rhsAfter=symbol::rest},r) = 214 insertItem((symbol, 215 ITEM{rule=rule,dot=dot+1,rhsAfter=rest}),r) 216 | shiftCores(_,r) = r 217 218(* insert the kernal items of a core *) 219 220 val newsets = List.foldr shiftCores newsets itemList 221 in Assoc.make_list newsets 222 end 223 224(* nontermEpsProds: returns a list of epsilon productions produced by a 225 nonterminal sorted by rule number. ** Depends on produces returning 226 an ordered list **. It does not alter the order in which the rules 227 were returned by produces; it only removes non-epsilon productions *) 228 229 val nontermEpsProds = 230 let val f = fn nt => 231 List.foldr 232 (fn (rule as RULE {rhs=nil,...},results) => rule :: results 233 | (_,results) => results) 234 [] (produces nt) 235 in memoize f 236 end 237 238(* epsProds: take a core and compute a list of epsilon productions for it 239 sorted by rule number. ** Depends on closureNonterms returning a list 240 of nonterminals sorted by nonterminal #, rule numbers increasing 241 monotonically with their lhs production #, and nontermEpsProds returning 242 an ordered item list for each production 243*) 244 245 fun epsProds (CORE (itemList,state)) = 246 let val prods = map nontermEpsProds (closureNonterms itemList) 247 in List.concat prods 248 end 249 250 in {produces=produces,shifts=shifts,rules=rules,epsProds=epsProds} 251 end 252end; 253