1structure grammarLib :> grammarLib =
2struct
3
4open HolKernel Parse boolSyntax
5open Abbrev
6
7local open grammarTheory in end
8
9datatype fs = datatype errormonad.fs
10type ('a,'b,'c) m = 'a -> ('a * ('b,'c)fs)
11
12fun return (a : 'a) : ('s,'a,'b)m = fn s => (s,Some a)
13fun ok e = return () e
14fun fail0 (m:string) e = (e, Error m)
15fun (m >- f) : ('s,'b,'error) m = fn (s0:'s) =>
16  let
17    val (s1, r) = m s0
18  in
19    case r of
20        Error x => (s1, Error x)
21      | Some v => f v s1
22  end
23fun (m1 >> m2) = m1 >- (fn _ => m2)
24fun mmap f [] = return []
25  | mmap f (h::t) = f h >- (fn h' => mmap f t >- (fn t' => return (h'::t')))
26
27infix >-* >>-* <-
28fun (m >-* cf) = m >- (fn v1 => cf v1 >- (fn v2 => return (v1,v2)))
29fun (m1 >>-* m2) = m1 >-* (fn _ => m2)
30fun (m1 <- m2) = m1 >- (fn v => m2 >> return v)
31fun get s = (s, Some s)
32
33type posn = int * int
34type posnmsg = posn * (posn * string) option
35type 'a tt = (posnmsg * term frag list, 'a, string) m
36
37fun ((m1 : 'a tt) ++ (m2 : 'a tt)) : 'a tt =
38  fn (s0 as ((p0, m0), qb0)) =>
39     case m1 s0 of
40         (((p, m), qb), Error e) => m2 ((p0, m), qb0)
41       | x => x
42
43fun repeat (m : 'a tt) s = ((m >> repeat m) ++ ok) s
44
45datatype stringt = S of string | TMnm of string
46datatype sym = NT of string | TOK of stringt
47datatype clause = Syms of sym list | TmAQ of term
48type t = (string * clause list) list
49
50datatype NTproblem = Unreachable of string | Undefined of string
51val emptyNTset = HOLset.empty String.compare
52fun clausesNTs (cs, acc) = let
53  fun symcase (NT s, acc) = HOLset.add(acc, s)
54    | symcase (TOK _, acc) = acc
55  fun clausecase (Syms syms, acc) = List.foldl symcase acc syms
56    | clausecase (TmAQ _, acc) = acc
57in
58  List.foldl clausecase acc cs
59end
60fun usedNTs (g:t) =
61  List.foldl (fn ((_,cs), acc) => clausesNTs(cs,acc)) emptyNTset g
62fun reachableNTs start (clauses : t) =
63  let
64    fun foldthis (c as (nt, cs), m) =
65      Binarymap.insert(m,nt,clausesNTs(cs, emptyNTset))
66    val ntmap = List.foldl foldthis (Binarymap.mkDict String.compare) clauses
67    fun dfs visited tovisit =
68      case tovisit of
69          [] => visited
70        | nt::rest => let
71          val visited' = HOLset.add(visited, nt)
72          val neighbours = case Binarymap.peek(ntmap, nt) of
73                               NONE => emptyNTset
74                             | SOME s => s
75          fun foldthis (nt, acc) =
76            if HOLset.member(visited', nt) then acc
77            else nt::acc
78          val tovisit' = HOLset.foldl foldthis rest neighbours
79        in
80          dfs visited' tovisit'
81        end
82  in
83    dfs emptyNTset [start]
84  end
85fun definedNTs (clauses : t) =
86  let
87    fun clausescase ((nt, _), acc) = HOLset.add(acc, nt)
88  in
89    List.foldl clausescase emptyNTset clauses
90  end
91
92fun NTproblems start g =
93  let
94    val used = usedNTs g
95    val reachable = reachableNTs start g
96    val defined = definedNTs g
97    val defined_less_reachable = HOLset.difference(defined, reachable)
98    val used_less_defined = HOLset.difference(used, defined)
99  in
100    HOLset.foldl (fn (s, acc) => Unreachable s :: acc)
101                 (HOLset.foldl (fn (s, acc) => Undefined s :: acc)
102                               []
103                               used_less_defined)
104                 defined_less_reachable
105  end
106
107fun newline ((col, line), msg) = ((0, line + 1), msg)
108fun add1col ((col, line), msg) = ((col + 1, line), msg)
109fun advance c p = if c = #"\n" then newline p else add1col p
110fun posn_toString (col, line) = Int.toString line ^ "." ^ Int.toString col
111fun posn_compare(p1 : posn, p2 : posn) =
112  case Int.compare (#2 p1, #2 p2) of
113      EQUAL => Int.compare(#1 p1, #1 p2)
114    | x => x
115val startposn :posnmsg = ((0, 1), NONE)
116
117fun fail s ((p0,m0), i) =
118  let
119    val msg0 = posn_toString p0 ^ ": " ^ s
120    fun finish m = fail0 msg0 ((p0, SOME m), i)
121  in
122    case m0 of
123        NONE => finish (p0,s)
124      | SOME(m as (p2, _)) =>
125        if posn_compare(p0,p2) <> LESS then finish (p0,msg0) else finish m
126  end
127
128fun setPosn (line,col) ((_, m), i) = ((((col,line), m), i), Some ())
129
130fun aq0 error (st as (posn:posnmsg, frags)) =
131    case frags of
132      [] => (st, Error error)
133    | QUOTE s :: rest => if s = "" then aq0 error (posn, rest)
134                         else (st, Error error)
135    | ANTIQUOTE t :: rest => ((posn, rest), Some t)
136
137fun getc error (st as (posn, i0)) =
138  case i0 of
139    [] => fail error st
140  | QUOTE s :: rest =>
141    if s = "" then getc error (posn, rest)
142    else let val i' = if size s = 1 then rest
143                      else QUOTE (String.extract(s,1,NONE)) :: rest
144             val c = String.sub(s,0)
145             val posn' = advance c posn
146         in
147           ((posn', i'), Some (String.sub(s,0)))
148         end
149  | ANTIQUOTE _ :: _ => fail error st
150
151fun dropP P = repeat (getc "" >- (fn c => if P c then ok
152                                          else fail ""))
153
154fun getP P =
155  getc "getP: EOF" >-
156  (fn c => if P c then return c else fail "getP: P false")
157
158fun token0 s = let
159  val sz = size s
160  fun recurse i =
161      if i = sz then ok
162      else let
163        val c = String.sub(s,i)
164      in
165        getc ("EOF while looking for "^str c^" of "^s) >-
166        (fn c' => if c' = c then recurse (i + 1)
167                  else fail ("token: didn't find "^str c^" of "^s))
168      end
169in
170  recurse 0
171end
172
173fun mnot m s = let
174  val (s',res) = m s
175in
176  case res of
177    Some _ => fail "mnot" s
178  | Error _ => ok s
179end
180
181(* needs to be eta-expanded to make it suitably polymorphic
182    (ware the value restriction!)
183*)
184fun barecomment s =
185    (token0 "(*" >> repeat (mnot (token0 "*)") >> getc "") >> token0 "*)") s
186
187fun mrpt m =
188  let
189    fun recurse acc =
190      (m >- (fn v => recurse (v::acc))) ++ return (List.rev acc)
191  in
192    recurse []
193  end
194
195fun mrpt1 m =
196  (m >>-* mrpt m) >- (fn (x,xs) => return (x::xs))
197
198val int = mrpt1 (getP Char.isDigit) >-
199          (fn clist =>
200              return (valOf (Int.fromString (String.implode clist))))
201
202val posn_comment =
203    token0 "(*#loc " >> (int >>-* (getP Char.isSpace >> int)) >- setPosn >>
204    token0 "*)"
205
206val comment = posn_comment ++ barecomment
207
208
209fun lex m = repeat ((getP Char.isSpace >> ok) ++ comment) >> m
210fun complete m = m <- repeat ((getP Char.isSpace >> ok) ++ comment)
211fun token s = lex (token0 s)
212fun aq s = lex (aq0 s)
213
214fun ident_constituent c =
215    Char.isAlphaNum c orelse c = #"'" orelse c = #"_"
216
217fun ident0 s =
218  (getP Char.isAlpha >-
219   (fn c => mrpt (getP ident_constituent) >-
220   (fn cs => return (String.implode (c::cs))))) s
221
222fun ident s = lex ident0 s
223
224fun escape #"n" = #"\n"
225  | escape #"t" = #"\t"
226  | escape c = c
227
228fun slitchar s =
229  ((getP (equal #"\\") >> getc "" >- (return o escape)) ++
230   (getP (not o equal #"\""))) s
231
232val slitcontent : string tt =
233  mrpt slitchar >- (return o String.implode)
234
235val slit0 : string tt =
236  getP (equal #"\"") >>
237  slitcontent >-
238  (fn s => (getP (equal #"\"") >> return s) ++
239           fail "Unterminated string literal")
240
241val slit = lex slit0
242
243fun sepby m sep =
244    (m >>-* mrpt (sep >> m)) >- (return o op::)
245
246(* clause = a sequence of tokens and non-terminals
247            (one alternative of a set of productions)
248                  OR
249            an antiquoted term *)
250val clause =
251    (aq "" >- (return o TmAQ)) ++
252    (mrpt ((ident >- (return o NT)) ++
253           (slit >- (return o TOK o S)) ++
254           ((token "<" >> ident <- token ">") >- (return o TOK o TMnm))) >-
255     (return o Syms))
256
257
258(* rulebody = a set of clauses, together specifying all possible
259              expansions for a non-terminal. *)
260val rulebody =
261    sepby clause (token "|") <- token ";"
262
263val grule = ident >>-* (token "::=" >> rulebody)
264val grammar0 =
265    complete (mrpt grule) <-
266     (mnot (getc "") ++
267      fail "Couldn't make sense of remaining input")
268
269fun grammar fs =
270    case grammar0 (startposn, fs) of
271        (((_,SOME(p,m)), _), Error _) => raise Fail m
272      | (((_, NONE), _), Error s) =>
273          raise Fail ("Invariant failure, (and "^s^")")
274      | (_, Some v) => v
275
276fun allnts f (g: t) : string HOLset.set = let
277  fun clausents acc [] = acc
278    | clausents acc (TOK _ :: rest) = clausents acc rest
279    | clausents acc (NT s :: rest) = clausents (HOLset.add(acc,f s)) rest
280  fun bodynts acc [] = acc
281    | bodynts acc (Syms h::t) = bodynts (clausents acc h) t
282    | bodynts acc (TmAQ _ :: t) = bodynts acc t
283  fun rulents acc (nt,b) = bodynts (HOLset.add(acc,f nt)) b
284  fun recurse acc [] = acc
285    | recurse acc (h::t) = recurse (rulents acc h) t
286in
287  recurse (HOLset.empty String.compare) g
288end
289
290type ginfo = {tokmap : string -> term, start : string,
291              nt_tyname : string, tokty : hol_type,
292              mkntname : string -> string, gname : string}
293
294fun mk_symty (tokty, nty) =
295    mk_thy_type {Thy = "grammar", Tyop = "symbol",
296                 Args = [tokty, nty]}
297fun mk_infty ty = sumSyntax.mk_sum(ty, numSyntax.num)
298
299val TOK_t =
300    mk_thy_const{Thy = "grammar", Name = "TOK",
301                 Ty = alpha --> mk_symty(alpha,beta)}
302val NT_t =
303    mk_thy_const{Thy = "grammar", Name = "NT",
304                 Ty = mk_infty alpha --> mk_symty(beta,alpha)}
305
306fun injinf tm = sumSyntax.mk_inl(tm, numSyntax.num)
307
308fun appletter ty =
309    case total dest_thy_type ty of
310        SOME {Tyop,...} => str (String.sub(Tyop,0))
311      | NONE => str (String.sub(dest_vartype ty,1))
312
313fun mkNT0 nty mkntname s = injinf (mk_const(mkntname s, nty))
314fun mkNT nty ({tokty,mkntname,...}:ginfo) s =
315    mk_comb(inst [alpha |-> nty, beta |-> tokty] NT_t, mkNT0 nty mkntname s)
316
317
318fun sym_to_term nty (gi as {tokmap,tokty,mkntname,...}:ginfo) sym used = let
319  val TOK_t' = inst [alpha |-> tokty, beta |-> nty] TOK_t
320  fun mktok t = mk_comb(TOK_t', t)
321  fun termsym_to_term t = let
322    val (_, ty) = dest_const t handle HOL_ERR _ =>
323                  raise mk_HOL_ERR "grammarLib" "mk_grammar_def"
324                        ("Term " ^ Lib.quote (term_to_string t) ^
325                         " is not a constant")
326    val (args, r) = strip_fun ty
327    fun var aty used = let
328      val t = variant used (mk_var(appletter aty, aty))
329    in
330      (t::used, Some t)
331    end
332    val (used, vs) =
333        case mmap var args used of
334            (_, Error _) => raise Fail "Impossible"
335          | (used, Some vs) => (used, vs)
336  in
337    (used, Some (mktok (list_mk_comb(t, vs))))
338  end
339in
340  case sym of
341      TOK (S s) => (used,  Some (mktok (tokmap s)))
342    | TOK (TMnm n) => termsym_to_term (Parse.Term [QUOTE n])
343    | NT n => (used, Some (mkNT nty gi n))
344end
345
346fun image f destty t = let
347  open pred_setSyntax pairSyntax
348  fun err() = mk_HOL_ERR "grammarLib" "mk_grammar_def"
349                         ("Can't handle form of antiquoted term " ^
350                          term_to_string t)
351in
352  if same_const t empty_tm then inst [alpha |-> destty] empty_tm
353  else
354    case Lib.total dest_insert t of
355        SOME (e, rest) => mk_insert(f e, image f destty rest)
356      | NONE =>
357        let
358        in
359          case Lib.total dest_union t of
360              SOME(s1,s2) => mk_union(image f destty s1, image f destty s2)
361            | NONE =>
362              let
363              in
364                case Lib.total dest_comb t of
365                    SOME (g,x) => if same_const g gspec_tm then
366                                    let val (v, body) = dest_pabs x
367                                        val (e, cond) = dest_pair body
368                                    in
369                                      mk_icomb(gspec_tm,
370                                               mk_pabs(v,mk_pair(f e, cond)))
371                                    end
372                                  else raise err()
373                  | NONE => raise err()
374              end
375        end
376end
377
378fun clause_to_termSet nty (gi as {tokty,...}:ginfo) c = let
379  val symty = mk_symty(tokty,nty)
380  open pred_setSyntax pairSyntax listSyntax
381in
382  case c of
383      Syms slist =>
384      let
385        val (used, ts) =
386            case mmap (sym_to_term nty gi) slist [] of
387                (used, Some ts) => (used, ts)
388             |  _ => raise Fail "Can't happen"
389        val body = mk_list(ts, symty)
390      in
391        case used of
392            [] => mk_insert(body, inst [alpha |-> type_of body] empty_tm)
393          | _ => mk_icomb(gspec_tm, mk_pabs(list_mk_pair used, mk_pair(body, T)))
394      end
395    | TmAQ t =>
396      let
397        val TOK_t' = inst [alpha |-> tokty, beta |-> nty] TOK_t
398        val symlist_ty = mk_list_type symty
399      in
400        image (fn t => mk_list([mk_comb(TOK_t', t)], symty)) symlist_ty t
401      end
402end
403
404
405val warn = HOL_WARNING "grammarLib" "grammar"
406fun NTproblem_warn (Unreachable s) = warn ("Unused non-terminal: " ^ s)
407  | NTproblem_warn (Undefined s) = warn ("Undefined non-terminal: " ^ s)
408fun reportNTproblems top g = List.app NTproblem_warn (NTproblems top g)
409
410fun mk_grammar_def0 (gi:ginfo) (g:t) = let
411  val {tokmap,nt_tyname,mkntname,tokty,start,gname,...} = gi
412  val _ = reportNTproblems start g
413  val nt_names = allnts mkntname g
414  val constructors =
415      ParseDatatype.Constructors
416        (HOLset.foldl (fn (nm,l) => (nm,[]) :: l) [] nt_names)
417  val _ = Datatype.astHol_datatype [(nt_tyname,constructors)]
418  val nty = mk_thy_type { Thy = current_theory(),
419                          Tyop = nt_tyname, Args = []}
420  val U = list_mk_lbinop (curry pred_setSyntax.mk_union) o List.concat
421  val symty = mk_symty(tokty,nty)
422  val gty = mk_thy_type {Thy = "grammar", Tyop = "grammar", Args = [tokty,nty]}
423  fun foldthis ((ntnm, cs), rules_fm) = let
424    val nt_t = mkNT0 nty (#mkntname gi) ntnm
425    val rhs_sets = map (clause_to_termSet nty gi) cs
426    val rhs_set = list_mk_lbinop (curry pred_setSyntax.mk_union) rhs_sets
427  in
428    finite_mapSyntax.mk_fupdate(rules_fm, pairSyntax.mk_pair(nt_t, rhs_set))
429  end
430
431  val rules = List.foldl foldthis
432                         (finite_mapSyntax.mk_fempty
433                            (mk_infty nty, listSyntax.mk_list_type symty --> bool))
434                         g
435  val grammar_t =
436      TypeBase.mk_record (gty, [("start", mkNT0 nty mkntname start),
437                                ("rules", rules)])
438  val mesg = with_flag(MESG_to_string, Lib.I) HOL_MESG
439  val defn_name = gname ^ "_def"
440in
441  new_definition (defn_name, mk_eq(mk_var(gname, gty), grammar_t)) before
442  mesg ((if !Globals.interactive then
443           "Grammar definition has been stored under "
444         else
445           "Saved definition __ ") ^
446        Lib.quote defn_name ^ "\n")
447end
448
449fun mk_grammar_def r q = mk_grammar_def0 r (grammar q)
450
451
452(*
453local open stringTheory in end
454val _ = Hol_datatype`tok = LparT | RparT | StarT | PlusT | Number of num | Ident of string`
455val Number = ``Number``
456val tmap =
457  List.foldl (fn ((nm,t),m) => Binarymap.insert(m,nm,t))
458             (Binarymap.mkDict String.compare)
459             [("+", ``PlusT``), ("*", ``StarT``), ("(", ``LparT``),
460              (")", ``RparT``)];
461mk_grammar_def {tokmap = (fn s => valOf (Binarymap.peek(tmap,s))),
462                nt_tyname = "nt", tokty = ``:tok``, gname = "exprG",
463                mkntname = (fn s => "n" ^ s), start = "E"}
464               `E ::= E "*" F' | F';
465                F' ::= F' "+" T | T;
466                T ::= <Number> | "(" E ")" | ^(``{ Ident s | s <> "" ��� isAlpha (HD s)}``);`;
467
468*)
469
470end
471