1structure grammarLib :> grammarLib = 2struct 3 4open HolKernel Parse boolSyntax 5open Abbrev 6 7local open grammarTheory in end 8 9datatype fs = datatype errormonad.fs 10type ('a,'b,'c) m = 'a -> ('a * ('b,'c)fs) 11 12fun return (a : 'a) : ('s,'a,'b)m = fn s => (s,Some a) 13fun ok e = return () e 14fun fail0 (m:string) e = (e, Error m) 15fun (m >- f) : ('s,'b,'error) m = fn (s0:'s) => 16 let 17 val (s1, r) = m s0 18 in 19 case r of 20 Error x => (s1, Error x) 21 | Some v => f v s1 22 end 23fun (m1 >> m2) = m1 >- (fn _ => m2) 24fun mmap f [] = return [] 25 | mmap f (h::t) = f h >- (fn h' => mmap f t >- (fn t' => return (h'::t'))) 26 27infix >-* >>-* <- 28fun (m >-* cf) = m >- (fn v1 => cf v1 >- (fn v2 => return (v1,v2))) 29fun (m1 >>-* m2) = m1 >-* (fn _ => m2) 30fun (m1 <- m2) = m1 >- (fn v => m2 >> return v) 31fun get s = (s, Some s) 32 33type posn = int * int 34type posnmsg = posn * (posn * string) option 35type 'a tt = (posnmsg * term frag list, 'a, string) m 36 37fun ((m1 : 'a tt) ++ (m2 : 'a tt)) : 'a tt = 38 fn (s0 as ((p0, m0), qb0)) => 39 case m1 s0 of 40 (((p, m), qb), Error e) => m2 ((p0, m), qb0) 41 | x => x 42 43fun repeat (m : 'a tt) s = ((m >> repeat m) ++ ok) s 44 45datatype stringt = S of string | TMnm of string 46datatype sym = NT of string | TOK of stringt 47datatype clause = Syms of sym list | TmAQ of term 48type t = (string * clause list) list 49 50datatype NTproblem = Unreachable of string | Undefined of string 51val emptyNTset = HOLset.empty String.compare 52fun clausesNTs (cs, acc) = let 53 fun symcase (NT s, acc) = HOLset.add(acc, s) 54 | symcase (TOK _, acc) = acc 55 fun clausecase (Syms syms, acc) = List.foldl symcase acc syms 56 | clausecase (TmAQ _, acc) = acc 57in 58 List.foldl clausecase acc cs 59end 60fun usedNTs (g:t) = 61 List.foldl (fn ((_,cs), acc) => clausesNTs(cs,acc)) emptyNTset g 62fun reachableNTs start (clauses : t) = 63 let 64 fun foldthis (c as (nt, cs), m) = 65 Binarymap.insert(m,nt,clausesNTs(cs, emptyNTset)) 66 val ntmap = List.foldl foldthis (Binarymap.mkDict String.compare) clauses 67 fun dfs visited tovisit = 68 case tovisit of 69 [] => visited 70 | nt::rest => let 71 val visited' = HOLset.add(visited, nt) 72 val neighbours = case Binarymap.peek(ntmap, nt) of 73 NONE => emptyNTset 74 | SOME s => s 75 fun foldthis (nt, acc) = 76 if HOLset.member(visited', nt) then acc 77 else nt::acc 78 val tovisit' = HOLset.foldl foldthis rest neighbours 79 in 80 dfs visited' tovisit' 81 end 82 in 83 dfs emptyNTset [start] 84 end 85fun definedNTs (clauses : t) = 86 let 87 fun clausescase ((nt, _), acc) = HOLset.add(acc, nt) 88 in 89 List.foldl clausescase emptyNTset clauses 90 end 91 92fun NTproblems start g = 93 let 94 val used = usedNTs g 95 val reachable = reachableNTs start g 96 val defined = definedNTs g 97 val defined_less_reachable = HOLset.difference(defined, reachable) 98 val used_less_defined = HOLset.difference(used, defined) 99 in 100 HOLset.foldl (fn (s, acc) => Unreachable s :: acc) 101 (HOLset.foldl (fn (s, acc) => Undefined s :: acc) 102 [] 103 used_less_defined) 104 defined_less_reachable 105 end 106 107fun newline ((col, line), msg) = ((0, line + 1), msg) 108fun add1col ((col, line), msg) = ((col + 1, line), msg) 109fun advance c p = if c = #"\n" then newline p else add1col p 110fun posn_toString (col, line) = Int.toString line ^ "." ^ Int.toString col 111fun posn_compare(p1 : posn, p2 : posn) = 112 case Int.compare (#2 p1, #2 p2) of 113 EQUAL => Int.compare(#1 p1, #1 p2) 114 | x => x 115val startposn :posnmsg = ((0, 1), NONE) 116 117fun fail s ((p0,m0), i) = 118 let 119 val msg0 = posn_toString p0 ^ ": " ^ s 120 fun finish m = fail0 msg0 ((p0, SOME m), i) 121 in 122 case m0 of 123 NONE => finish (p0,s) 124 | SOME(m as (p2, _)) => 125 if posn_compare(p0,p2) <> LESS then finish (p0,msg0) else finish m 126 end 127 128fun setPosn (line,col) ((_, m), i) = ((((col,line), m), i), Some ()) 129 130fun aq0 error (st as (posn:posnmsg, frags)) = 131 case frags of 132 [] => (st, Error error) 133 | QUOTE s :: rest => if s = "" then aq0 error (posn, rest) 134 else (st, Error error) 135 | ANTIQUOTE t :: rest => ((posn, rest), Some t) 136 137fun getc error (st as (posn, i0)) = 138 case i0 of 139 [] => fail error st 140 | QUOTE s :: rest => 141 if s = "" then getc error (posn, rest) 142 else let val i' = if size s = 1 then rest 143 else QUOTE (String.extract(s,1,NONE)) :: rest 144 val c = String.sub(s,0) 145 val posn' = advance c posn 146 in 147 ((posn', i'), Some (String.sub(s,0))) 148 end 149 | ANTIQUOTE _ :: _ => fail error st 150 151fun dropP P = repeat (getc "" >- (fn c => if P c then ok 152 else fail "")) 153 154fun getP P = 155 getc "getP: EOF" >- 156 (fn c => if P c then return c else fail "getP: P false") 157 158fun token0 s = let 159 val sz = size s 160 fun recurse i = 161 if i = sz then ok 162 else let 163 val c = String.sub(s,i) 164 in 165 getc ("EOF while looking for "^str c^" of "^s) >- 166 (fn c' => if c' = c then recurse (i + 1) 167 else fail ("token: didn't find "^str c^" of "^s)) 168 end 169in 170 recurse 0 171end 172 173fun mnot m s = let 174 val (s',res) = m s 175in 176 case res of 177 Some _ => fail "mnot" s 178 | Error _ => ok s 179end 180 181(* needs to be eta-expanded to make it suitably polymorphic 182 (ware the value restriction!) 183*) 184fun barecomment s = 185 (token0 "(*" >> repeat (mnot (token0 "*)") >> getc "") >> token0 "*)") s 186 187fun mrpt m = 188 let 189 fun recurse acc = 190 (m >- (fn v => recurse (v::acc))) ++ return (List.rev acc) 191 in 192 recurse [] 193 end 194 195fun mrpt1 m = 196 (m >>-* mrpt m) >- (fn (x,xs) => return (x::xs)) 197 198val int = mrpt1 (getP Char.isDigit) >- 199 (fn clist => 200 return (valOf (Int.fromString (String.implode clist)))) 201 202val posn_comment = 203 token0 "(*#loc " >> (int >>-* (getP Char.isSpace >> int)) >- setPosn >> 204 token0 "*)" 205 206val comment = posn_comment ++ barecomment 207 208 209fun lex m = repeat ((getP Char.isSpace >> ok) ++ comment) >> m 210fun complete m = m <- repeat ((getP Char.isSpace >> ok) ++ comment) 211fun token s = lex (token0 s) 212fun aq s = lex (aq0 s) 213 214fun ident_constituent c = 215 Char.isAlphaNum c orelse c = #"'" orelse c = #"_" 216 217fun ident0 s = 218 (getP Char.isAlpha >- 219 (fn c => mrpt (getP ident_constituent) >- 220 (fn cs => return (String.implode (c::cs))))) s 221 222fun ident s = lex ident0 s 223 224fun escape #"n" = #"\n" 225 | escape #"t" = #"\t" 226 | escape c = c 227 228fun slitchar s = 229 ((getP (equal #"\\") >> getc "" >- (return o escape)) ++ 230 (getP (not o equal #"\""))) s 231 232val slitcontent : string tt = 233 mrpt slitchar >- (return o String.implode) 234 235val slit0 : string tt = 236 getP (equal #"\"") >> 237 slitcontent >- 238 (fn s => (getP (equal #"\"") >> return s) ++ 239 fail "Unterminated string literal") 240 241val slit = lex slit0 242 243fun sepby m sep = 244 (m >>-* mrpt (sep >> m)) >- (return o op::) 245 246(* clause = a sequence of tokens and non-terminals 247 (one alternative of a set of productions) 248 OR 249 an antiquoted term *) 250val clause = 251 (aq "" >- (return o TmAQ)) ++ 252 (mrpt ((ident >- (return o NT)) ++ 253 (slit >- (return o TOK o S)) ++ 254 ((token "<" >> ident <- token ">") >- (return o TOK o TMnm))) >- 255 (return o Syms)) 256 257 258(* rulebody = a set of clauses, together specifying all possible 259 expansions for a non-terminal. *) 260val rulebody = 261 sepby clause (token "|") <- token ";" 262 263val grule = ident >>-* (token "::=" >> rulebody) 264val grammar0 = 265 complete (mrpt grule) <- 266 (mnot (getc "") ++ 267 fail "Couldn't make sense of remaining input") 268 269fun grammar fs = 270 case grammar0 (startposn, fs) of 271 (((_,SOME(p,m)), _), Error _) => raise Fail m 272 | (((_, NONE), _), Error s) => 273 raise Fail ("Invariant failure, (and "^s^")") 274 | (_, Some v) => v 275 276fun allnts f (g: t) : string HOLset.set = let 277 fun clausents acc [] = acc 278 | clausents acc (TOK _ :: rest) = clausents acc rest 279 | clausents acc (NT s :: rest) = clausents (HOLset.add(acc,f s)) rest 280 fun bodynts acc [] = acc 281 | bodynts acc (Syms h::t) = bodynts (clausents acc h) t 282 | bodynts acc (TmAQ _ :: t) = bodynts acc t 283 fun rulents acc (nt,b) = bodynts (HOLset.add(acc,f nt)) b 284 fun recurse acc [] = acc 285 | recurse acc (h::t) = recurse (rulents acc h) t 286in 287 recurse (HOLset.empty String.compare) g 288end 289 290type ginfo = {tokmap : string -> term, start : string, 291 nt_tyname : string, tokty : hol_type, 292 mkntname : string -> string, gname : string} 293 294fun mk_symty (tokty, nty) = 295 mk_thy_type {Thy = "grammar", Tyop = "symbol", 296 Args = [tokty, nty]} 297fun mk_infty ty = sumSyntax.mk_sum(ty, numSyntax.num) 298 299val TOK_t = 300 mk_thy_const{Thy = "grammar", Name = "TOK", 301 Ty = alpha --> mk_symty(alpha,beta)} 302val NT_t = 303 mk_thy_const{Thy = "grammar", Name = "NT", 304 Ty = mk_infty alpha --> mk_symty(beta,alpha)} 305 306fun injinf tm = sumSyntax.mk_inl(tm, numSyntax.num) 307 308fun appletter ty = 309 case total dest_thy_type ty of 310 SOME {Tyop,...} => str (String.sub(Tyop,0)) 311 | NONE => str (String.sub(dest_vartype ty,1)) 312 313fun mkNT0 nty mkntname s = injinf (mk_const(mkntname s, nty)) 314fun mkNT nty ({tokty,mkntname,...}:ginfo) s = 315 mk_comb(inst [alpha |-> nty, beta |-> tokty] NT_t, mkNT0 nty mkntname s) 316 317 318fun sym_to_term nty (gi as {tokmap,tokty,mkntname,...}:ginfo) sym used = let 319 val TOK_t' = inst [alpha |-> tokty, beta |-> nty] TOK_t 320 fun mktok t = mk_comb(TOK_t', t) 321 fun termsym_to_term t = let 322 val (_, ty) = dest_const t handle HOL_ERR _ => 323 raise mk_HOL_ERR "grammarLib" "mk_grammar_def" 324 ("Term " ^ Lib.quote (term_to_string t) ^ 325 " is not a constant") 326 val (args, r) = strip_fun ty 327 fun var aty used = let 328 val t = variant used (mk_var(appletter aty, aty)) 329 in 330 (t::used, Some t) 331 end 332 val (used, vs) = 333 case mmap var args used of 334 (_, Error _) => raise Fail "Impossible" 335 | (used, Some vs) => (used, vs) 336 in 337 (used, Some (mktok (list_mk_comb(t, vs)))) 338 end 339in 340 case sym of 341 TOK (S s) => (used, Some (mktok (tokmap s))) 342 | TOK (TMnm n) => termsym_to_term (Parse.Term [QUOTE n]) 343 | NT n => (used, Some (mkNT nty gi n)) 344end 345 346fun image f destty t = let 347 open pred_setSyntax pairSyntax 348 fun err() = mk_HOL_ERR "grammarLib" "mk_grammar_def" 349 ("Can't handle form of antiquoted term " ^ 350 term_to_string t) 351in 352 if same_const t empty_tm then inst [alpha |-> destty] empty_tm 353 else 354 case Lib.total dest_insert t of 355 SOME (e, rest) => mk_insert(f e, image f destty rest) 356 | NONE => 357 let 358 in 359 case Lib.total dest_union t of 360 SOME(s1,s2) => mk_union(image f destty s1, image f destty s2) 361 | NONE => 362 let 363 in 364 case Lib.total dest_comb t of 365 SOME (g,x) => if same_const g gspec_tm then 366 let val (v, body) = dest_pabs x 367 val (e, cond) = dest_pair body 368 in 369 mk_icomb(gspec_tm, 370 mk_pabs(v,mk_pair(f e, cond))) 371 end 372 else raise err() 373 | NONE => raise err() 374 end 375 end 376end 377 378fun clause_to_termSet nty (gi as {tokty,...}:ginfo) c = let 379 val symty = mk_symty(tokty,nty) 380 open pred_setSyntax pairSyntax listSyntax 381in 382 case c of 383 Syms slist => 384 let 385 val (used, ts) = 386 case mmap (sym_to_term nty gi) slist [] of 387 (used, Some ts) => (used, ts) 388 | _ => raise Fail "Can't happen" 389 val body = mk_list(ts, symty) 390 in 391 case used of 392 [] => mk_insert(body, inst [alpha |-> type_of body] empty_tm) 393 | _ => mk_icomb(gspec_tm, mk_pabs(list_mk_pair used, mk_pair(body, T))) 394 end 395 | TmAQ t => 396 let 397 val TOK_t' = inst [alpha |-> tokty, beta |-> nty] TOK_t 398 val symlist_ty = mk_list_type symty 399 in 400 image (fn t => mk_list([mk_comb(TOK_t', t)], symty)) symlist_ty t 401 end 402end 403 404 405val warn = HOL_WARNING "grammarLib" "grammar" 406fun NTproblem_warn (Unreachable s) = warn ("Unused non-terminal: " ^ s) 407 | NTproblem_warn (Undefined s) = warn ("Undefined non-terminal: " ^ s) 408fun reportNTproblems top g = List.app NTproblem_warn (NTproblems top g) 409 410fun mk_grammar_def0 (gi:ginfo) (g:t) = let 411 val {tokmap,nt_tyname,mkntname,tokty,start,gname,...} = gi 412 val _ = reportNTproblems start g 413 val nt_names = allnts mkntname g 414 val constructors = 415 ParseDatatype.Constructors 416 (HOLset.foldl (fn (nm,l) => (nm,[]) :: l) [] nt_names) 417 val _ = Datatype.astHol_datatype [(nt_tyname,constructors)] 418 val nty = mk_thy_type { Thy = current_theory(), 419 Tyop = nt_tyname, Args = []} 420 val U = list_mk_lbinop (curry pred_setSyntax.mk_union) o List.concat 421 val symty = mk_symty(tokty,nty) 422 val gty = mk_thy_type {Thy = "grammar", Tyop = "grammar", Args = [tokty,nty]} 423 fun foldthis ((ntnm, cs), rules_fm) = let 424 val nt_t = mkNT0 nty (#mkntname gi) ntnm 425 val rhs_sets = map (clause_to_termSet nty gi) cs 426 val rhs_set = list_mk_lbinop (curry pred_setSyntax.mk_union) rhs_sets 427 in 428 finite_mapSyntax.mk_fupdate(rules_fm, pairSyntax.mk_pair(nt_t, rhs_set)) 429 end 430 431 val rules = List.foldl foldthis 432 (finite_mapSyntax.mk_fempty 433 (mk_infty nty, listSyntax.mk_list_type symty --> bool)) 434 g 435 val grammar_t = 436 TypeBase.mk_record (gty, [("start", mkNT0 nty mkntname start), 437 ("rules", rules)]) 438 val mesg = with_flag(MESG_to_string, Lib.I) HOL_MESG 439 val defn_name = gname ^ "_def" 440in 441 new_definition (defn_name, mk_eq(mk_var(gname, gty), grammar_t)) before 442 mesg ((if !Globals.interactive then 443 "Grammar definition has been stored under " 444 else 445 "Saved definition __ ") ^ 446 Lib.quote defn_name ^ "\n") 447end 448 449fun mk_grammar_def r q = mk_grammar_def0 r (grammar q) 450 451 452(* 453local open stringTheory in end 454val _ = Hol_datatype`tok = LparT | RparT | StarT | PlusT | Number of num | Ident of string` 455val Number = ``Number`` 456val tmap = 457 List.foldl (fn ((nm,t),m) => Binarymap.insert(m,nm,t)) 458 (Binarymap.mkDict String.compare) 459 [("+", ``PlusT``), ("*", ``StarT``), ("(", ``LparT``), 460 (")", ``RparT``)]; 461mk_grammar_def {tokmap = (fn s => valOf (Binarymap.peek(tmap,s))), 462 nt_tyname = "nt", tokty = ``:tok``, gname = "exprG", 463 mkntname = (fn s => "n" ^ s), start = "E"} 464 `E ::= E "*" F' | F'; 465 F' ::= F' "+" T | T; 466 T ::= <Number> | "(" E ")" | ^(``{ Ident s | s <> "" ��� isAlpha (HD s)}``);`; 467 468*) 469 470end 471