1(*---------------------------------------------------------------------------
2                Parsing datatype specifications
3
4   The grammar we're parsing is:
5
6       G ::=              id "=" <form>
7       form ::=           <phrase> ( "|" <phrase> ) *  |  <record_defn>
8       phrase ::=         id  | id "of" <under_constr>
9       under_constr ::=   <ptype> ( "=>" <ptype> ) * | <record_defn>
10       record_defn ::=    "<|"  <idtype_pairs> "|>"
11       idtype_pairs ::=   id ":" <type> | id : <type> ";" <idtype_pairs>
12       ptype ::=          <type> | "(" <type> ")"
13
14  It had better be the case that => is not a type infix.  This is true of
15  the standard HOL distribution.  In the event that => is an infix, this
16  code will still work as long as the input puts the types in parentheses.
17 ---------------------------------------------------------------------------*)
18
19structure ParseDatatype :> ParseDatatype =
20struct
21
22open Feedback
23val ERR = mk_HOL_ERR "ParseDatatype";
24val ERRloc = mk_HOL_ERRloc "ParseDatatype";
25
26open Portable Lib;
27
28open ParseDatatype_dtype
29
30fun pretypeToType pty =
31  case pty of
32    dVartype s => Type.mk_vartype s
33  | dTyop {Tyop = s, Thy, Args} => let
34    in
35      case Thy of
36        NONE => Type.mk_type(s, map pretypeToType Args)
37      | SOME t => Type.mk_thy_type{Tyop = s, Thy = t,
38                                   Args = map pretypeToType Args}
39    end
40  | dAQ pty => pty
41
42val bads = CharSet.addString(CharSet.empty, "()\"")
43
44fun consume P (qb,s,locn) = let
45  open base_tokens
46  (* know first char of s is OK wrt P *)
47  fun grab i =
48      if i = size s then i
49      else if P (String.sub(s,i)) then grab (i + 1)
50      else i
51  val alphapfx_len = grab 1
52  val pfx = String.substring(s,0,alphapfx_len)
53  val sfx = String.extract(s,alphapfx_len,NONE)
54in
55  if sfx = "" then ((fn () => qbuf.advance qb), pfx, locn)
56  else let
57      val (locn',locn'') = locn.split_at (size pfx) locn
58    in
59      ((fn () => qbuf.replace_current (BT_Ident sfx,locn'') qb), pfx, locn')
60    end
61end
62
63fun consume_n n (qb,s,locn) =
64  let
65    open base_tokens
66    val pfx = String.substring(s,0,n)
67    val sfx = String.extract(s,n,NONE)
68  in
69    if sfx = "" then ((fn () => qbuf.advance qb), BT_Ident pfx, locn)
70    else
71      let
72        val (locn',locn'') = locn.split_at n locn
73      in
74        ((fn () => qbuf.replace_current (BT_Ident sfx,locn'') qb),
75         BT_Ident pfx, locn')
76      end
77  end
78
79fun okident c = Char.isAlphaNum c orelse c = #"'" orelse c = #"_"
80
81fun ident_munge dollared qb s locn = let
82  val s0 = String.sub(s, 0)
83in
84  if Char.isAlpha s0 then let
85      val (adv, pfx, locn') = consume okident (qb,s,locn)
86    in
87      if pfx <> "of" orelse dollared then (adv(); pfx)
88      else raise ERRloc "ident" locn
89                        "Expected an identifier, got (reserved word) \"of\""
90    end
91  else if s0 = #"$" then
92    (* Allow leading dollar signs as quoting mechanism (for "of", but to
93       also cope with potential user paranoia over names of infix
94       constructors).
95       Note that the base_lexer ensures that only one dollar sign is possible
96       at the head of a BT_Ident string, and that it is followed by a
97       non-empty string *)
98    ident_munge true qb (String.extract(s, 1, NONE)) (locn.move_start 1 locn)
99  else let
100      val s_chars = CharSet.addString(CharSet.empty, s)
101      val overlap = CharSet.intersect(bads, s_chars)
102    in
103      if CharSet.isEmpty overlap then (qbuf.advance qb; s)
104      else raise ERRloc "ident" locn
105                        (s ^ " not a valid constructor/field/type name")
106    end
107end
108
109fun ident qb =
110    case qbuf.current qb of
111      (base_tokens.BT_Ident s,locn) => ident_munge false qb s locn
112    | (bt,locn) => raise ERRloc "ident" locn ("Expected an identifier, got "^
113                                              base_tokens.toString bt)
114
115
116fun cmem c s =
117  let
118    fun recurse i =
119      i >= 0 andalso (String.sub(s,i) = c orelse recurse (i - 1))
120  in
121    recurse (size s - 1)
122  end
123
124
125fun pdtok_of qb = let
126  open base_tokens CharSet
127  fun advance () = qbuf.advance qb
128in
129  case qbuf.current qb of
130      (t as BT_Ident s,locn) =>
131      let
132        val c0 = String.sub(s, 0)
133      in
134        if Char.isAlpha c0 then
135          let
136            val (adv,idstr,locn') = consume Char.isAlphaNum (qb,s,locn)
137          in
138            (adv,BT_Ident idstr,locn')
139          end
140        else if Char.isDigit c0 then
141          let
142            val (adv,idstr,locn') = consume Char.isDigit (qb,s,locn)
143          in
144            (adv, BT_Ident idstr, locn')
145          end
146        else if String.isPrefix "=>" s then consume_n 2 (qb,s,locn)
147        else if cmem c0 "()[]" then consume_n 1 (qb,s,locn)
148        else if String.isPrefix "<|" s then consume_n 2 (qb,s,locn)
149        else if String.isPrefix "|>" s then consume_n 2 (qb,s,locn)
150        else
151          let
152            fun oksym c = Char.isPunct c andalso c <> #"(" andalso c <> #")"
153                          andalso c <> #"'"
154            val (adv,idstr,locn') = consume oksym (qb,s,locn)
155          in
156            (adv,BT_Ident idstr,locn')
157          end
158      end
159    | (t,locn) => (advance, t, locn)
160end;
161
162
163fun scan s qb = let
164  val (adv, t, locn) = pdtok_of qb
165in
166  case t of
167    base_tokens.BT_Ident s' => if s <> s' then
168                                 raise ERRloc "scan" locn
169                                           ("Wanted \""^s^"\"; got \""^s'^"\"")
170                               else adv()
171  | x => raise ERRloc "scan" locn ("Wanted \""^s^"\"; got \""^
172                                   base_tokens.toString x^"\"")
173end
174
175fun qtyop {Tyop, Thy, Locn, Args} =
176    dTyop {Tyop = Tyop, Thy = SOME Thy, Args = Args}
177fun tyop ((s,locn), args) = dTyop {Tyop = s, Thy = NONE, Args = args}
178
179fun parse_type G strm =
180  parse_type.parse_type
181    {vartype = dVartype o #1, tyop = tyop, qtyop = qtyop, antiq = dAQ}
182    true
183    G
184    strm
185
186fun parse_atom G strm =
187  parse_type.parse_atom
188    {vartype = dVartype o #1, tyop = tyop, qtyop = qtyop, antiq = dAQ}
189    true
190    G
191    strm
192
193val parse_constructor_id = ident
194
195fun parse_record_fld G qb = let
196  val fldname = ident qb
197  val () = scan ":" qb
198in
199  (fldname, parse_type G qb)
200end
201
202fun sepby1 sepsym p qb = let
203  val i1 = p qb
204  fun recurse acc =
205      case Lib.total (scan sepsym) qb of
206        NONE => List.rev acc
207      | SOME () => recurse (p qb :: acc)
208in
209  recurse [i1]
210end
211
212fun termsepby1 s term p qb =
213  let
214    val res1 = p qb
215    fun recurse acc =
216      case pdtok_of qb of
217          (adv, base_tokens.BT_Ident t, locn) =>
218            if t = s then (adv(); term_or_continue acc)
219            else List.rev acc
220        | _ => List.rev acc
221    and term_or_continue acc =
222      case pdtok_of qb of
223          (_, tok, _) => if tok = term then List.rev acc
224                         else recurse (p qb :: acc)
225  in
226    recurse [res1]
227  end
228
229fun parse_record_defn G qb = let
230  val () = scan "<|" qb
231  val result =
232      termsepby1 ";" (base_tokens.BT_Ident "|>") (parse_record_fld G) qb
233  val () = scan "|>" qb
234in
235  result
236end
237
238fun parse_phrase G qb = let
239  val constr_id = parse_constructor_id qb
240in
241  case pdtok_of qb of
242    (_,base_tokens.BT_Ident "of",_) => let
243      val _ = qbuf.advance qb
244      val optargs = sepby1 "=>" (parse_type G) qb
245    in
246      (constr_id, optargs)
247    end
248  | _ => (constr_id, [])
249end
250
251fun optscan tok qb =
252    case qbuf.current qb of
253        (tok',_) => if tok = tok' then (qbuf.advance qb; qb)
254                    else qb
255
256fun fragtoString (QUOTE s) = s
257  | fragtoString (ANTIQUOTE _) = " ^... "
258
259fun quotetoString [] = ""
260  | quotetoString (x::xs) = fragtoString x ^ quotetoString xs
261
262fun parse_harg G qb =
263  case qbuf.current qb of
264      (base_tokens.BT_Ident s, _) =>
265      if String.sub(s,0) = #"(" then
266        let
267          val (adv,_,_) = pdtok_of qb
268          val _ = adv()
269        in
270          parse_type G qb before scan ")" qb
271        end
272      else
273        (parse_atom G qb
274         handle HOL_ERR {origin_structure = "Parse", message, ...} =>
275                raise ERR "parse_harg" message)
276    | (base_tokens.BT_AQ ty, _) => (qbuf.advance qb; dAQ ty)
277    | (_, locn) => raise ERRloc "parse_harg" locn
278                         "Unexpected token in constructor's argument"
279
280fun parse_hargs G qb =
281  case pdtok_of qb of
282      (_, base_tokens.BT_Ident "|", _) => []
283    | (_, base_tokens.BT_Ident ";", _) => []
284    | (_, base_tokens.BT_EOI, _) => []
285    | _ => let
286      val arg = parse_harg G qb
287      val args = parse_hargs G qb
288    in
289      arg::args
290    end
291
292fun parse_hphrase G qb = let
293  val constr_id = parse_constructor_id qb
294in
295  (constr_id, parse_hargs G qb)
296end
297
298fun parse_form G phrase_p qb =
299    case pdtok_of qb of
300      (_,base_tokens.BT_Ident "<|",_) => Record (parse_record_defn G qb)
301    | _ => Constructors (qb |> optscan (base_tokens.BT_Ident "|")
302                            |> sepby1 "|" (phrase_p G))
303
304fun extract_tynames q =
305  let
306    val qb = qbuf.new_buffer q
307    fun recurse delims acc =
308      case pdtok_of qb of
309          (adv,t as base_tokens.BT_Ident ";",loc) =>
310            if null delims then (adv(); next_decl acc)
311            else (adv(); recurse delims acc)
312        | (_,base_tokens.BT_EOI, _) =>
313            if null delims then List.rev acc
314            else raise ERR "parse_HG"
315                       ("looking for delimiter match ("^ hd delims^
316                        ") but came to end of input")
317        | (adv,t as base_tokens.BT_Ident "(",locn) =>
318            (adv(); recurse (")" :: delims) acc)
319        | (adv,t as base_tokens.BT_Ident "<|", locn) =>
320            (adv(); recurse ("|>" :: delims) acc)
321        | (adv,t as base_tokens.BT_Ident "[", locn) =>
322            (adv(); recurse ("]" :: delims) acc)
323        | (adv, t as base_tokens.BT_Ident s, locn) =>
324            (case delims of
325                 [] => (adv(); recurse delims acc)
326               | d::ds => if d = s then (adv(); recurse ds acc)
327                          else (adv(); recurse delims acc))
328        | (adv, tok, locn) => (adv(); recurse delims acc)
329    and next_decl acc =
330        case pdtok_of qb of
331            (_, base_tokens.BT_EOI, _) => List.rev acc
332          | _ =>
333            let
334              val tyname = ident qb
335              val () = scan "=" qb
336            in
337              recurse [] (tyname :: acc)
338            end
339  in
340    next_decl []
341  end
342
343fun parse_g G phrase_p qb = let
344  val tyname = ident qb
345  val () = scan "=" qb
346in
347  (tyname, parse_form G phrase_p qb)
348end
349
350fun hide_tynames q G0 =
351  List.foldl (uncurry type_grammar.hide_tyop) G0 (extract_tynames q)
352
353
354fun core_parse G0 phrase_p q = let
355  val G = hide_tynames q G0
356  val qb = qbuf.new_buffer q
357  val result = termsepby1 ";" base_tokens.BT_EOI (parse_g G phrase_p) qb
358in
359  case qbuf.current qb of
360      (base_tokens.BT_EOI,_) => result
361    | (t,locn) => raise ERRloc "parse" locn
362                        ("Parse failed looking at "^base_tokens.toString t)
363end
364
365fun parse G0 = core_parse G0 parse_phrase
366fun hparse G0 = core_parse G0 parse_hphrase
367
368
369end
370