1(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi 2 * 3 * $Log$ 4 * Revision 1.1 2006/06/22 07:40:27 michaeln 5 * Add a MoscowML compilable implementation of MLyacc, using the MLton sources 6 * as the base. 7 * 8 * Revision 1.1.1.1 1997/01/14 01:38:05 george 9 * Version 109.24 10 * 11 * Revision 1.1.1.1 1996/01/31 16:01:46 george 12 * Version 109 13 * 14 *) 15 16functor mkLook (structure IntGrammar : INTGRAMMAR) : LOOK = 17 struct 18 open Array List 19 infix 9 sub 20 structure Grammar = IntGrammar.Grammar 21 structure IntGrammar = IntGrammar 22 open Grammar IntGrammar 23 24 structure TermSet = ListOrdSet 25 (struct 26 type elem = term 27 val eq = eqTerm 28 val gt = gtTerm 29 end) 30 31 val union = TermSet.union 32 val make_set = TermSet.make_set 33 34 val prLook = fn (termToString,print) => 35 let val printTerm = print o termToString 36 fun f nil = print " " 37 | f (a :: b) = (printTerm a; print " "; f b) 38 in f 39 end 40 41 structure NontermSet = ListOrdSet 42 (struct 43 type elem = nonterm 44 val eq = eqNonterm 45 val gt = gtNonterm 46 end) 47 48 val mkFuncs = fn {rules : rule list, nonterms : int, 49 produces : nonterm -> rule list} => 50 51 let 52 53 (* nullable: create a function which tells if a nonterminal is nullable 54 or not. 55 56 Method: Keep an array of booleans. The nth entry is true if 57 NT i is nullable. If is false if we don't know whether NT i 58 is nullable. 59 60 Keep a list of rules whose remaining rhs we must prove to be 61 null. First, scan the list of rules and remove those rules 62 whose rhs contains a terminal. These rules are not nullable. 63 64 Now iterate through the rules that were left: 65 (1) if there is no remaining rhs we have proved that 66 the rule is nullable, mark the nonterminal for the 67 rule as nullable 68 (2) if the first element of the remaining rhs is 69 nullable, place the rule back on the list with 70 the rest of the rhs 71 (3) if we don't know whether the nonterminal is nullable, 72 place it back on the list 73 (4) repeat until the list does not change. 74 75 We have found all the possible nullable rules. 76 *) 77 78 val nullable = 79 let fun ok_rhs nil = true 80 | ok_rhs ((TERM _)::_) = false 81 | ok_rhs ((NONTERM i)::r) = ok_rhs r 82 fun add_rule (RULE {lhs,rhs,...},r) = 83 if ok_rhs rhs 84 then (lhs, 85 map 86 (fn NONTERM (NT i) => i | _ => raise Fail "add_rule") 87 rhs) :: r 88 else r 89 val items = List.foldr add_rule [] rules 90 val nullable = array(nonterms,false) 91 val f = fn ((NT i,nil),(l,_)) => (update(nullable,i,true); 92 (l,true)) 93 | (a as (lhs,(h::t)),(l,change)) => 94 case (nullable sub h) 95 of false => (a::l,change) 96 | true => ((lhs,t)::l,true) 97 fun prove(l,true) = prove(List.foldr f (nil,false) l) 98 | prove(_,false) = () 99 in (prove(items,true); fn (NT i) => nullable sub i) 100 end 101 102 (* scanRhs : look at a list of symbols, scanning past nullable 103 nonterminals, applying addSymbol to the symbols scanned *) 104 105 fun scanRhs addSymbol = 106 let fun f (nil,result) = result 107 | f ((sym as NONTERM nt) :: rest,result) = 108 if nullable nt then f (rest,addSymbol(sym,result)) 109 else addSymbol(sym,result) 110 | f ((sym as TERM _) :: _,result) = addSymbol(sym,result) 111 in f 112 end 113 114 (* accumulate: look at the start of the right-hand-sides of rules, 115 looking past nullable nonterminals, applying addObj to the visible 116 symbols. *) 117 118 fun accumulate(rules, empty, addObj) = 119 List.foldr (fn (RULE {rhs,...},r) =>(scanRhs addObj) (rhs,r)) empty rules 120 121 val nontermMemo = fn f => 122 let val lookup = array(nonterms,nil) 123 fun g i = if i=nonterms then () 124 else (update(lookup,i,f (NT i)); g (i+1)) 125 in (g 0; fn (NT j) => lookup sub j) 126 end 127 128 (* first1: the FIRST set of a nonterminal in the grammar. Only looks 129 at other terminals, but it is clever enough to move past nullable 130 nonterminals at the start of a production. *) 131 132 fun first1 nt = accumulate(produces nt, TermSet.empty, 133 fn (TERM t, set) => TermSet.insert (t,set) 134 | (_, set) => set) 135 136 val first1 = nontermMemo(first1) 137 138 (* starters1: given a nonterminal "nt", return the set of nonterminals 139 which can start its productions. Looks past nullables, but doesn't 140 recurse *) 141 142 fun starters1 nt = accumulate(produces nt, nil, 143 fn (NONTERM nt, set) => 144 NontermSet.insert(nt,set) 145 | (_, set) => set) 146 147 val starters1 = nontermMemo(starters1) 148 149 (* first: maps a nonterminal to its first-set. Get all the starters of 150 the nonterminal, get the first1 terminal set of each of these, 151 union the whole lot together *) 152 153 fun first nt = 154 List.foldr (fn (a,r) => TermSet.union(r,first1 a)) 155 [] (NontermSet.closure (NontermSet.singleton nt, starters1)) 156 157 val first = nontermMemo(first) 158 159 (* prefix: all possible terminals starting a symbol list *) 160 161 fun prefix symbols = 162 scanRhs (fn (TERM t,r) => TermSet.insert(t,r) 163 | (NONTERM nt,r) => TermSet.union(first nt,r)) 164 (symbols,nil) 165 166 fun nullable_string ((TERM t) :: r) = false 167 | nullable_string ((NONTERM nt) :: r) = 168 (case (nullable nt) 169 of true => nullable_string r 170 | f => f) 171 | nullable_string nil = true 172 173 in {nullable = nullable, first = prefix} 174 end 175end; 176