1(*--------------------------------------------------------------------------- 2 Parsing datatype specifications 3 4 The grammar we're parsing is: 5 6 G ::= id "=" <form> 7 form ::= <phrase> ( "|" <phrase> ) * | <record_defn> 8 phrase ::= id | id "of" <under_constr> 9 under_constr ::= <ptype> ( "=>" <ptype> ) * | <record_defn> 10 record_defn ::= "<|" <idtype_pairs> "|>" 11 idtype_pairs ::= id ":" <type> | id : <type> ";" <idtype_pairs> 12 ptype ::= <type> | "(" <type> ")" 13 14 It had better be the case that => is not a type infix. This is true of 15 the standard HOL distribution. In the event that => is an infix, this 16 code will still work as long as the input puts the types in parentheses. 17 ---------------------------------------------------------------------------*) 18 19structure ParseDatatype :> ParseDatatype = 20struct 21 22open Feedback 23val ERR = mk_HOL_ERR "ParseDatatype"; 24val ERRloc = mk_HOL_ERRloc "ParseDatatype"; 25 26open Portable Lib; 27 28open ParseDatatype_dtype 29 30fun pretypeToType pty = 31 case pty of 32 dVartype s => Type.mk_vartype s 33 | dTyop {Tyop = s, Thy, Args} => let 34 in 35 case Thy of 36 NONE => Type.mk_type(s, map pretypeToType Args) 37 | SOME t => Type.mk_thy_type{Tyop = s, Thy = t, 38 Args = map pretypeToType Args} 39 end 40 | dAQ pty => pty 41 42val bads = CharSet.addString(CharSet.empty, "()\"") 43 44fun consume P (qb,s,locn) = let 45 open base_tokens 46 (* know first char of s is OK wrt P *) 47 fun grab i = 48 if i = size s then i 49 else if P (String.sub(s,i)) then grab (i + 1) 50 else i 51 val alphapfx_len = grab 1 52 val pfx = String.substring(s,0,alphapfx_len) 53 val sfx = String.extract(s,alphapfx_len,NONE) 54in 55 if sfx = "" then ((fn () => qbuf.advance qb), pfx, locn) 56 else let 57 val (locn',locn'') = locn.split_at (size pfx) locn 58 in 59 ((fn () => qbuf.replace_current (BT_Ident sfx,locn'') qb), pfx, locn') 60 end 61end 62 63fun consume_n n (qb,s,locn) = 64 let 65 open base_tokens 66 val pfx = String.substring(s,0,n) 67 val sfx = String.extract(s,n,NONE) 68 in 69 if sfx = "" then ((fn () => qbuf.advance qb), BT_Ident pfx, locn) 70 else 71 let 72 val (locn',locn'') = locn.split_at n locn 73 in 74 ((fn () => qbuf.replace_current (BT_Ident sfx,locn'') qb), 75 BT_Ident pfx, locn') 76 end 77 end 78 79fun okident c = Char.isAlphaNum c orelse c = #"'" orelse c = #"_" 80 81fun ident_munge dollared qb s locn = let 82 val s0 = String.sub(s, 0) 83in 84 if Char.isAlpha s0 then let 85 val (adv, pfx, locn') = consume okident (qb,s,locn) 86 in 87 if pfx <> "of" orelse dollared then (adv(); pfx) 88 else raise ERRloc "ident" locn 89 "Expected an identifier, got (reserved word) \"of\"" 90 end 91 else if s0 = #"$" then 92 (* Allow leading dollar signs as quoting mechanism (for "of", but to 93 also cope with potential user paranoia over names of infix 94 constructors). 95 Note that the base_lexer ensures that only one dollar sign is possible 96 at the head of a BT_Ident string, and that it is followed by a 97 non-empty string *) 98 ident_munge true qb (String.extract(s, 1, NONE)) (locn.move_start 1 locn) 99 else let 100 val s_chars = CharSet.addString(CharSet.empty, s) 101 val overlap = CharSet.intersect(bads, s_chars) 102 in 103 if CharSet.isEmpty overlap then (qbuf.advance qb; s) 104 else raise ERRloc "ident" locn 105 (s ^ " not a valid constructor/field/type name") 106 end 107end 108 109fun ident qb = 110 case qbuf.current qb of 111 (base_tokens.BT_Ident s,locn) => ident_munge false qb s locn 112 | (bt,locn) => raise ERRloc "ident" locn ("Expected an identifier, got "^ 113 base_tokens.toString bt) 114 115 116fun cmem c s = 117 let 118 fun recurse i = 119 i >= 0 andalso (String.sub(s,i) = c orelse recurse (i - 1)) 120 in 121 recurse (size s - 1) 122 end 123 124 125fun pdtok_of qb = let 126 open base_tokens CharSet 127 fun advance () = qbuf.advance qb 128in 129 case qbuf.current qb of 130 (t as BT_Ident s,locn) => 131 let 132 val c0 = String.sub(s, 0) 133 in 134 if Char.isAlpha c0 then 135 let 136 val (adv,idstr,locn') = consume Char.isAlphaNum (qb,s,locn) 137 in 138 (adv,BT_Ident idstr,locn') 139 end 140 else if Char.isDigit c0 then 141 let 142 val (adv,idstr,locn') = consume Char.isDigit (qb,s,locn) 143 in 144 (adv, BT_Ident idstr, locn') 145 end 146 else if String.isPrefix "=>" s then consume_n 2 (qb,s,locn) 147 else if cmem c0 "()[]" then consume_n 1 (qb,s,locn) 148 else if String.isPrefix "<|" s then consume_n 2 (qb,s,locn) 149 else if String.isPrefix "|>" s then consume_n 2 (qb,s,locn) 150 else 151 let 152 fun oksym c = Char.isPunct c andalso c <> #"(" andalso c <> #")" 153 andalso c <> #"'" 154 val (adv,idstr,locn') = consume oksym (qb,s,locn) 155 in 156 (adv,BT_Ident idstr,locn') 157 end 158 end 159 | (t,locn) => (advance, t, locn) 160end; 161 162 163fun scan s qb = let 164 val (adv, t, locn) = pdtok_of qb 165in 166 case t of 167 base_tokens.BT_Ident s' => if s <> s' then 168 raise ERRloc "scan" locn 169 ("Wanted \""^s^"\"; got \""^s'^"\"") 170 else adv() 171 | x => raise ERRloc "scan" locn ("Wanted \""^s^"\"; got \""^ 172 base_tokens.toString x^"\"") 173end 174 175fun qtyop {Tyop, Thy, Locn, Args} = 176 dTyop {Tyop = Tyop, Thy = SOME Thy, Args = Args} 177fun tyop ((s,locn), args) = dTyop {Tyop = s, Thy = NONE, Args = args} 178 179fun parse_type G strm = 180 parse_type.parse_type 181 {vartype = dVartype o #1, tyop = tyop, qtyop = qtyop, antiq = dAQ} 182 true 183 G 184 strm 185 186fun parse_atom G strm = 187 parse_type.parse_atom 188 {vartype = dVartype o #1, tyop = tyop, qtyop = qtyop, antiq = dAQ} 189 true 190 G 191 strm 192 193val parse_constructor_id = ident 194 195fun parse_record_fld G qb = let 196 val fldname = ident qb 197 val () = scan ":" qb 198in 199 (fldname, parse_type G qb) 200end 201 202fun sepby1 sepsym p qb = let 203 val i1 = p qb 204 fun recurse acc = 205 case Lib.total (scan sepsym) qb of 206 NONE => List.rev acc 207 | SOME () => recurse (p qb :: acc) 208in 209 recurse [i1] 210end 211 212fun termsepby1 s term p qb = 213 let 214 val res1 = p qb 215 fun recurse acc = 216 case pdtok_of qb of 217 (adv, base_tokens.BT_Ident t, locn) => 218 if t = s then (adv(); term_or_continue acc) 219 else List.rev acc 220 | _ => List.rev acc 221 and term_or_continue acc = 222 case pdtok_of qb of 223 (_, tok, _) => if tok = term then List.rev acc 224 else recurse (p qb :: acc) 225 in 226 recurse [res1] 227 end 228 229fun parse_record_defn G qb = let 230 val () = scan "<|" qb 231 val result = 232 termsepby1 ";" (base_tokens.BT_Ident "|>") (parse_record_fld G) qb 233 val () = scan "|>" qb 234in 235 result 236end 237 238fun parse_phrase G qb = let 239 val constr_id = parse_constructor_id qb 240in 241 case pdtok_of qb of 242 (_,base_tokens.BT_Ident "of",_) => let 243 val _ = qbuf.advance qb 244 val optargs = sepby1 "=>" (parse_type G) qb 245 in 246 (constr_id, optargs) 247 end 248 | _ => (constr_id, []) 249end 250 251fun optscan tok qb = 252 case qbuf.current qb of 253 (tok',_) => if tok = tok' then (qbuf.advance qb; qb) 254 else qb 255 256fun fragtoString (QUOTE s) = s 257 | fragtoString (ANTIQUOTE _) = " ^... " 258 259fun quotetoString [] = "" 260 | quotetoString (x::xs) = fragtoString x ^ quotetoString xs 261 262fun parse_harg G qb = 263 case qbuf.current qb of 264 (base_tokens.BT_Ident s, _) => 265 if String.sub(s,0) = #"(" then 266 let 267 val (adv,_,_) = pdtok_of qb 268 val _ = adv() 269 in 270 parse_type G qb before scan ")" qb 271 end 272 else 273 (parse_atom G qb 274 handle HOL_ERR {origin_structure = "Parse", message, ...} => 275 raise ERR "parse_harg" message) 276 | (base_tokens.BT_AQ ty, _) => (qbuf.advance qb; dAQ ty) 277 | (_, locn) => raise ERRloc "parse_harg" locn 278 "Unexpected token in constructor's argument" 279 280fun parse_hargs G qb = 281 case pdtok_of qb of 282 (_, base_tokens.BT_Ident "|", _) => [] 283 | (_, base_tokens.BT_Ident ";", _) => [] 284 | (_, base_tokens.BT_EOI, _) => [] 285 | _ => let 286 val arg = parse_harg G qb 287 val args = parse_hargs G qb 288 in 289 arg::args 290 end 291 292fun parse_hphrase G qb = let 293 val constr_id = parse_constructor_id qb 294in 295 (constr_id, parse_hargs G qb) 296end 297 298fun parse_form G phrase_p qb = 299 case pdtok_of qb of 300 (_,base_tokens.BT_Ident "<|",_) => Record (parse_record_defn G qb) 301 | _ => Constructors (qb |> optscan (base_tokens.BT_Ident "|") 302 |> sepby1 "|" (phrase_p G)) 303 304fun extract_tynames q = 305 let 306 val qb = qbuf.new_buffer q 307 fun recurse delims acc = 308 case pdtok_of qb of 309 (adv,t as base_tokens.BT_Ident ";",loc) => 310 if null delims then (adv(); next_decl acc) 311 else (adv(); recurse delims acc) 312 | (_,base_tokens.BT_EOI, _) => 313 if null delims then List.rev acc 314 else raise ERR "parse_HG" 315 ("looking for delimiter match ("^ hd delims^ 316 ") but came to end of input") 317 | (adv,t as base_tokens.BT_Ident "(",locn) => 318 (adv(); recurse (")" :: delims) acc) 319 | (adv,t as base_tokens.BT_Ident "<|", locn) => 320 (adv(); recurse ("|>" :: delims) acc) 321 | (adv,t as base_tokens.BT_Ident "[", locn) => 322 (adv(); recurse ("]" :: delims) acc) 323 | (adv, t as base_tokens.BT_Ident s, locn) => 324 (case delims of 325 [] => (adv(); recurse delims acc) 326 | d::ds => if d = s then (adv(); recurse ds acc) 327 else (adv(); recurse delims acc)) 328 | (adv, tok, locn) => (adv(); recurse delims acc) 329 and next_decl acc = 330 case pdtok_of qb of 331 (_, base_tokens.BT_EOI, _) => List.rev acc 332 | _ => 333 let 334 val tyname = ident qb 335 val () = scan "=" qb 336 in 337 recurse [] (tyname :: acc) 338 end 339 in 340 next_decl [] 341 end 342 343fun parse_g G phrase_p qb = let 344 val tyname = ident qb 345 val () = scan "=" qb 346in 347 (tyname, parse_form G phrase_p qb) 348end 349 350fun hide_tynames q G0 = 351 List.foldl (uncurry type_grammar.hide_tyop) G0 (extract_tynames q) 352 353 354fun core_parse G0 phrase_p q = let 355 val G = hide_tynames q G0 356 val qb = qbuf.new_buffer q 357 val result = termsepby1 ";" base_tokens.BT_EOI (parse_g G phrase_p) qb 358in 359 case qbuf.current qb of 360 (base_tokens.BT_EOI,_) => result 361 | (t,locn) => raise ERRloc "parse" locn 362 ("Parse failed looking at "^base_tokens.toString t) 363end 364 365fun parse G0 = core_parse G0 parse_phrase 366fun hparse G0 = core_parse G0 parse_hphrase 367 368 369end 370