1(*  Title:      Pure/Isar/token.ML
2    Author:     Markus Wenzel, TU Muenchen
3
4Outer token syntax for Isabelle/Isar.
5*)
6
7signature TOKEN =
8sig
9  datatype kind =
10    (*immediate source*)
11    Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat |
12    Float | Space |
13    (*delimited content*)
14    String | Alt_String | Verbatim | Cartouche | Comment of Comment.kind option |
15    (*special content*)
16    Error of string | EOF
17  val str_of_kind: kind -> string
18  type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T}
19  type T
20  type src = T list
21  type name_value = {name: string, kind: string, print: Proof.context -> Markup.T * xstring}
22  datatype value =
23    Source of src |
24    Literal of bool * Markup.T |
25    Name of name_value * morphism |
26    Typ of typ |
27    Term of term |
28    Fact of string option * thm list |
29    Attribute of morphism -> attribute |
30    Declaration of declaration |
31    Files of file Exn.result list
32  val pos_of: T -> Position.T
33  val adjust_offsets: (int -> int option) -> T -> T
34  val eof: T
35  val is_eof: T -> bool
36  val not_eof: T -> bool
37  val stopper: T Scan.stopper
38  val kind_of: T -> kind
39  val is_kind: kind -> T -> bool
40  val is_command: T -> bool
41  val keyword_with: (string -> bool) -> T -> bool
42  val is_command_modifier: T -> bool
43  val ident_with: (string -> bool) -> T -> bool
44  val is_proper: T -> bool
45  val is_comment: T -> bool
46  val is_informal_comment: T -> bool
47  val is_formal_comment: T -> bool
48  val is_document_marker: T -> bool
49  val is_ignored: T -> bool
50  val is_begin_ignore: T -> bool
51  val is_end_ignore: T -> bool
52  val is_error: T -> bool
53  val is_space: T -> bool
54  val is_blank: T -> bool
55  val is_newline: T -> bool
56  val range_of: T list -> Position.range
57  val core_range_of: T list -> Position.range
58  val content_of: T -> string
59  val source_of: T -> string
60  val input_of: T -> Input.source
61  val inner_syntax_of: T -> string
62  val keyword_markup: bool * Markup.T -> string -> Markup.T
63  val completion_report: T -> Position.report_text list
64  val reports: Keyword.keywords -> T -> Position.report_text list
65  val markups: Keyword.keywords -> T -> Markup.T list
66  val unparse: T -> string
67  val print: T -> string
68  val text_of: T -> string * string
69  val file_source: file -> Input.source
70  val get_files: T -> file Exn.result list
71  val put_files: file Exn.result list -> T -> T
72  val get_value: T -> value option
73  val reports_of_value: T -> Position.report list
74  val name_value: name_value -> value
75  val get_name: T -> name_value option
76  val declare_maxidx: T -> Proof.context -> Proof.context
77  val map_facts: (string option -> thm list -> thm list) -> T -> T
78  val trim_context_src: src -> src
79  val transform: morphism -> T -> T
80  val init_assignable: T -> T
81  val assign: value option -> T -> T
82  val evaluate: ('a -> value) -> (T -> 'a) -> T -> 'a
83  val closure: T -> T
84  val pretty_value: Proof.context -> T -> Pretty.T
85  val name_of_src: src -> string * Position.T
86  val args_of_src: src -> T list
87  val checked_src: src -> bool
88  val check_src: Proof.context -> (Proof.context -> 'a Name_Space.table) -> src -> src * 'a
89  val pretty_src: Proof.context -> src -> Pretty.T
90  val ident_or_symbolic: string -> bool
91  val read_cartouche: Symbol_Pos.T list -> T
92  val tokenize: Keyword.keywords -> {strict: bool} -> Symbol_Pos.T list -> T list
93  val explode: Keyword.keywords -> Position.T -> string -> T list
94  val explode0: Keyword.keywords -> string -> T list
95  val print_name: Keyword.keywords -> string -> string
96  val make: (int * int) * string -> Position.T -> T * Position.T
97  val make_string: string * Position.T -> T
98  val make_int: int -> T list
99  val make_src: string * Position.T -> T list -> src
100  type 'a parser = T list -> 'a * T list
101  type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list)
102  val read_body: Keyword.keywords -> 'a parser -> Symbol_Pos.T list -> 'a option
103  val read_antiq: Keyword.keywords -> 'a parser -> Symbol_Pos.T list * Position.T -> 'a
104  val syntax_generic: 'a context_parser -> src -> Context.generic -> 'a * Context.generic
105  val syntax: 'a context_parser -> src -> Proof.context -> 'a * Proof.context
106end;
107
108structure Token: TOKEN =
109struct
110
111(** tokens **)
112
113(* token kind *)
114
115datatype kind =
116  (*immediate source*)
117  Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat |
118  Float | Space |
119  (*delimited content*)
120  String | Alt_String | Verbatim | Cartouche | Comment of Comment.kind option |
121  (*special content*)
122  Error of string | EOF;
123
124val str_of_kind =
125 fn Command => "command"
126  | Keyword => "keyword"
127  | Ident => "identifier"
128  | Long_Ident => "long identifier"
129  | Sym_Ident => "symbolic identifier"
130  | Var => "schematic variable"
131  | Type_Ident => "type variable"
132  | Type_Var => "schematic type variable"
133  | Nat => "natural number"
134  | Float => "floating-point number"
135  | Space => "white space"
136  | String => "quoted string"
137  | Alt_String => "back-quoted string"
138  | Verbatim => "verbatim text"
139  | Cartouche => "text cartouche"
140  | Comment NONE => "informal comment"
141  | Comment (SOME _) => "formal comment"
142  | Error _ => "bad input"
143  | EOF => "end-of-input";
144
145val immediate_kinds =
146  Vector.fromList
147    [Command, Keyword, Ident, Long_Ident, Sym_Ident, Var, Type_Ident, Type_Var, Nat, Float, Space];
148
149val delimited_kind =
150  (fn String => true
151    | Alt_String => true
152    | Verbatim => true
153    | Cartouche => true
154    | Comment _ => true
155    | _ => false);
156
157
158(* datatype token *)
159
160(*The value slot assigns an (optional) internal value to a token,
161  usually as a side-effect of special scanner setup (see also
162  args.ML).  Note that an assignable ref designates an intermediate
163  state of internalization -- it is NOT meant to persist.*)
164
165type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T};
166
167type name_value = {name: string, kind: string, print: Proof.context -> Markup.T * xstring};
168
169datatype T = Token of (Symbol_Pos.text * Position.range) * (kind * string) * slot
170
171and slot =
172  Slot |
173  Value of value option |
174  Assignable of value option Unsynchronized.ref
175
176and value =
177  Source of T list |
178  Literal of bool * Markup.T |
179  Name of name_value * morphism |
180  Typ of typ |
181  Term of term |
182  Fact of string option * thm list |  (*optional name for dynamic fact, i.e. fact "variable"*)
183  Attribute of morphism -> attribute |
184  Declaration of declaration |
185  Files of file Exn.result list;
186
187type src = T list;
188
189
190(* position *)
191
192fun pos_of (Token ((_, (pos, _)), _, _)) = pos;
193fun end_pos_of (Token ((_, (_, pos)), _, _)) = pos;
194
195fun adjust_offsets adjust (Token ((x, range), y, z)) =
196  Token ((x, apply2 (Position.adjust_offsets adjust) range), y, z);
197
198
199(* stopper *)
200
201fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot);
202val eof = mk_eof Position.none;
203
204fun is_eof (Token (_, (EOF, _), _)) = true
205  | is_eof _ = false;
206
207val not_eof = not o is_eof;
208
209val stopper =
210  Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
211
212
213(* kind of token *)
214
215fun kind_of (Token (_, (k, _), _)) = k;
216fun is_kind k (Token (_, (k', _), _)) = k = k';
217
218val is_command = is_kind Command;
219
220fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x
221  | keyword_with _ _ = false;
222
223val is_command_modifier = keyword_with (fn x => x = "private" orelse x = "qualified");
224
225fun ident_with pred (Token (_, (Ident, x), _)) = pred x
226  | ident_with _ _ = false;
227
228fun is_ignored (Token (_, (Space, _), _)) = true
229  | is_ignored (Token (_, (Comment NONE, _), _)) = true
230  | is_ignored _ = false;
231
232fun is_proper (Token (_, (Space, _), _)) = false
233  | is_proper (Token (_, (Comment _, _), _)) = false
234  | is_proper _ = true;
235
236fun is_comment (Token (_, (Comment _, _), _)) = true
237  | is_comment _ = false;
238
239fun is_informal_comment (Token (_, (Comment NONE, _), _)) = true
240  | is_informal_comment _ = false;
241
242fun is_formal_comment (Token (_, (Comment (SOME _), _), _)) = true
243  | is_formal_comment _ = false;
244
245fun is_document_marker (Token (_, (Comment (SOME Comment.Marker), _), _)) = true
246  | is_document_marker _ = false;
247
248fun is_begin_ignore (Token (_, (Comment NONE, "<"), _)) = true
249  | is_begin_ignore _ = false;
250
251fun is_end_ignore (Token (_, (Comment NONE, ">"), _)) = true
252  | is_end_ignore _ = false;
253
254fun is_error (Token (_, (Error _, _), _)) = true
255  | is_error _ = false;
256
257
258(* blanks and newlines -- space tokens obey lines *)
259
260fun is_space (Token (_, (Space, _), _)) = true
261  | is_space _ = false;
262
263fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x)
264  | is_blank _ = false;
265
266fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x
267  | is_newline _ = false;
268
269
270(* range of tokens *)
271
272fun range_of (toks as tok :: _) =
273      let val pos' = end_pos_of (List.last toks)
274      in Position.range (pos_of tok, pos') end
275  | range_of [] = Position.no_range;
276
277val core_range_of =
278  drop_prefix is_ignored #> drop_suffix is_ignored #> range_of;
279
280
281(* token content *)
282
283fun content_of (Token (_, (_, x), _)) = x;
284fun source_of (Token ((source, _), _, _)) = source;
285
286fun input_of (Token ((source, range), (kind, _), _)) =
287  Input.source (delimited_kind kind) source range;
288
289fun inner_syntax_of tok =
290  let val x = content_of tok
291  in if YXML.detect x then x else Syntax.implode_input (input_of tok) end;
292
293
294(* markup reports *)
295
296local
297
298val token_kind_markup =
299 fn Var => (Markup.var, "")
300  | Type_Ident => (Markup.tfree, "")
301  | Type_Var => (Markup.tvar, "")
302  | String => (Markup.string, "")
303  | Alt_String => (Markup.alt_string, "")
304  | Verbatim => (Markup.verbatim, "")
305  | Cartouche => (Markup.cartouche, "")
306  | Comment _ => (Markup.comment, "")
307  | Error msg => (Markup.bad (), msg)
308  | _ => (Markup.empty, "");
309
310fun keyword_reports tok = map (fn markup => ((pos_of tok, markup), ""));
311
312fun command_markups keywords x =
313  if Keyword.is_theory_end keywords x then [Markup.keyword2 |> Markup.keyword_properties]
314  else
315    (if Keyword.is_proof_asm keywords x then [Markup.keyword3]
316     else if Keyword.is_improper keywords x then [Markup.keyword1, Markup.improper]
317     else [Markup.keyword1])
318    |> map Markup.command_properties;
319
320in
321
322fun keyword_markup (important, keyword) x =
323  if important orelse Symbol.is_ascii_identifier x then keyword else Markup.delimiter;
324
325fun completion_report tok =
326  if is_kind Keyword tok
327  then map (fn m => ((pos_of tok, m), "")) (Completion.suppress_abbrevs (content_of tok))
328  else [];
329
330fun reports keywords tok =
331  if is_command tok then
332    keyword_reports tok (command_markups keywords (content_of tok))
333  else if is_kind Keyword tok then
334    keyword_reports tok
335      [keyword_markup (false, Markup.keyword2 |> Markup.keyword_properties) (content_of tok)]
336  else
337    let
338      val pos = pos_of tok;
339      val (m, text) = token_kind_markup (kind_of tok);
340      val delete = #2 (Symbol_Pos.explode_delete (source_of tok, pos));
341    in ((pos, m), text) :: map (fn p => ((p, Markup.delete), "")) delete end;
342
343fun markups keywords = map (#2 o #1) o reports keywords;
344
345end;
346
347
348(* unparse *)
349
350fun unparse (Token (_, (kind, x), _)) =
351  (case kind of
352    String => Symbol_Pos.quote_string_qq x
353  | Alt_String => Symbol_Pos.quote_string_bq x
354  | Verbatim => enclose "{*" "*}" x
355  | Cartouche => cartouche x
356  | Comment NONE => enclose "(*" "*)" x
357  | EOF => ""
358  | _ => x);
359
360fun print tok = Markup.markups (markups Keyword.empty_keywords tok) (unparse tok);
361
362fun text_of tok =
363  let
364    val k = str_of_kind (kind_of tok);
365    val ms = markups Keyword.empty_keywords tok;
366    val s = unparse tok;
367  in
368    if s = "" then (k, "")
369    else if size s < 40 andalso not (exists_string (fn c => c = "\n") s)
370    then (k ^ " " ^ Markup.markups ms s, "")
371    else (k, Markup.markups ms s)
372  end;
373
374
375
376(** associated values **)
377
378(* inlined file content *)
379
380fun file_source (file: file) =
381  let
382    val text = cat_lines (#lines file);
383    val end_pos = fold Position.advance (Symbol.explode text) (#pos file);
384  in Input.source true text (Position.range (#pos file, end_pos)) end;
385
386fun get_files (Token (_, _, Value (SOME (Files files)))) = files
387  | get_files _ = [];
388
389fun put_files [] tok = tok
390  | put_files files (Token (x, y, Slot)) = Token (x, y, Value (SOME (Files files)))
391  | put_files _ tok = raise Fail ("Cannot put inlined files here" ^ Position.here (pos_of tok));
392
393
394(* access values *)
395
396fun get_value (Token (_, _, Value v)) = v
397  | get_value _ = NONE;
398
399fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v)))
400  | map_value _ tok = tok;
401
402
403(* reports of value *)
404
405fun get_assignable_value (Token (_, _, Assignable r)) = ! r
406  | get_assignable_value (Token (_, _, Value v)) = v
407  | get_assignable_value _ = NONE;
408
409fun reports_of_value tok =
410  (case get_assignable_value tok of
411    SOME (Literal markup) =>
412      let
413        val pos = pos_of tok;
414        val x = content_of tok;
415      in
416        if Position.is_reported pos then
417          map (pair pos) (keyword_markup markup x :: Completion.suppress_abbrevs x)
418        else []
419      end
420  | _ => []);
421
422
423(* name value *)
424
425fun name_value a = Name (a, Morphism.identity);
426
427fun get_name tok =
428  (case get_assignable_value tok of
429    SOME (Name (a, _)) => SOME a
430  | _ => NONE);
431
432
433(* maxidx *)
434
435fun declare_maxidx tok =
436  (case get_value tok of
437    SOME (Source src) => fold declare_maxidx src
438  | SOME (Typ T) => Variable.declare_maxidx (Term.maxidx_of_typ T)
439  | SOME (Term t) => Variable.declare_maxidx (Term.maxidx_of_term t)
440  | SOME (Fact (_, ths)) => fold (Variable.declare_maxidx o Thm.maxidx_of) ths
441  | SOME (Attribute _) => I  (* FIXME !? *)
442  | SOME (Declaration decl) =>
443      (fn ctxt =>
444        let val ctxt' = Context.proof_map (Morphism.form decl) ctxt
445        in Variable.declare_maxidx (Variable.maxidx_of ctxt') ctxt end)
446  | _ => I);
447
448
449(* fact values *)
450
451fun map_facts f =
452  map_value (fn v =>
453    (case v of
454      Source src => Source (map (map_facts f) src)
455    | Fact (a, ths) => Fact (a, f a ths)
456    | _ => v));
457
458val trim_context_src = (map o map_facts) (K (map Thm.trim_context));
459
460
461(* transform *)
462
463fun transform phi =
464  map_value (fn v =>
465    (case v of
466      Source src => Source (map (transform phi) src)
467    | Literal _ => v
468    | Name (a, psi) => Name (a, psi $> phi)
469    | Typ T => Typ (Morphism.typ phi T)
470    | Term t => Term (Morphism.term phi t)
471    | Fact (a, ths) => Fact (a, Morphism.fact phi ths)
472    | Attribute att => Attribute (Morphism.transform phi att)
473    | Declaration decl => Declaration (Morphism.transform phi decl)
474    | Files _ => v));
475
476
477(* static binding *)
478
479(*1st stage: initialize assignable slots*)
480fun init_assignable tok =
481  (case tok of
482    Token (x, y, Slot) => Token (x, y, Assignable (Unsynchronized.ref NONE))
483  | Token (_, _, Value _) => tok
484  | Token (_, _, Assignable r) => (r := NONE; tok));
485
486(*2nd stage: assign values as side-effect of scanning*)
487fun assign v tok =
488  (case tok of
489    Token (x, y, Slot) => Token (x, y, Value v)
490  | Token (_, _, Value _) => tok
491  | Token (_, _, Assignable r) => (r := v; tok));
492
493fun evaluate mk eval arg =
494  let val x = eval arg in (assign (SOME (mk x)) arg; x) end;
495
496(*3rd stage: static closure of final values*)
497fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v)
498  | closure tok = tok;
499
500
501(* pretty *)
502
503fun pretty_value ctxt tok =
504  (case get_value tok of
505    SOME (Literal markup) =>
506      let val x = content_of tok
507      in Pretty.mark_str (keyword_markup markup x, x) end
508  | SOME (Name ({print, ...}, _)) => Pretty.quote (Pretty.mark_str (print ctxt))
509  | SOME (Typ T) => Syntax.pretty_typ ctxt T
510  | SOME (Term t) => Syntax.pretty_term ctxt t
511  | SOME (Fact (_, ths)) =>
512      Pretty.enclose "(" ")" (Pretty.breaks (map (Pretty.cartouche o Thm.pretty_thm ctxt) ths))
513  | _ => Pretty.marks_str (markups Keyword.empty_keywords tok, unparse tok));
514
515
516(* src *)
517
518fun dest_src ([]: src) = raise Fail "Empty token source"
519  | dest_src (head :: args) = (head, args);
520
521fun name_of_src src =
522  let
523    val head = #1 (dest_src src);
524    val name =
525      (case get_name head of
526        SOME {name, ...} => name
527      | NONE => content_of head);
528  in (name, pos_of head) end;
529
530val args_of_src = #2 o dest_src;
531
532fun pretty_src ctxt src =
533  let
534    val (head, args) = dest_src src;
535    val prt_name =
536      (case get_name head of
537        SOME {print, ...} => Pretty.mark_str (print ctxt)
538      | NONE => Pretty.str (content_of head));
539  in Pretty.block (Pretty.breaks (Pretty.quote prt_name :: map (pretty_value ctxt) args)) end;
540
541fun checked_src (head :: _) = is_some (get_name head)
542  | checked_src [] = true;
543
544fun check_src ctxt get_table src =
545  let
546    val (head, args) = dest_src src;
547    val table = get_table ctxt;
548  in
549    (case get_name head of
550      SOME {name, ...} => (src, Name_Space.get table name)
551    | NONE =>
552        let
553          val pos = pos_of head;
554          val (name, x) = Name_Space.check (Context.Proof ctxt) table (content_of head, pos);
555          val _ = Context_Position.report ctxt pos Markup.operator;
556          val kind = Name_Space.kind_of (Name_Space.space_of_table table);
557          fun print ctxt' =
558            Name_Space.markup_extern ctxt' (Name_Space.space_of_table (get_table ctxt')) name;
559          val value = name_value {name = name, kind = kind, print = print};
560          val head' = closure (assign (SOME value) head);
561        in (head' :: args, x) end)
562  end;
563
564
565
566(** scanners **)
567
568open Basic_Symbol_Pos;
569
570val err_prefix = "Outer lexical error: ";
571
572fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg);
573
574
575(* scan symbolic idents *)
576
577val scan_symid =
578  Scan.many1 (Symbol.is_symbolic_char o Symbol_Pos.symbol) ||
579  Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single;
580
581fun is_symid str =
582  (case try Symbol.explode str of
583    SOME [s] => Symbol.is_symbolic s orelse Symbol.is_symbolic_char s
584  | SOME ss => forall Symbol.is_symbolic_char ss
585  | _ => false);
586
587fun ident_or_symbolic "begin" = false
588  | ident_or_symbolic ":" = true
589  | ident_or_symbolic "::" = true
590  | ident_or_symbolic s = Symbol_Pos.is_identifier s orelse is_symid s;
591
592
593(* scan verbatim text *)
594
595val scan_verb =
596  $$$ "*" --| Scan.ahead (~$$ "}") ||
597  Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s) >> single;
598
599val scan_verbatim =
600  Scan.ahead ($$ "{" -- $$ "*") |--
601    !!! "unclosed verbatim text"
602      ((Symbol_Pos.scan_pos --| $$ "{" --| $$ "*") --
603        (Scan.repeats scan_verb -- ($$ "*" |-- $$ "}" |-- Symbol_Pos.scan_pos)));
604
605val recover_verbatim =
606  $$$ "{" @@@ $$$ "*" @@@ Scan.repeats scan_verb;
607
608
609(* scan cartouche *)
610
611val scan_cartouche =
612  Symbol_Pos.scan_pos --
613    ((Symbol_Pos.scan_cartouche err_prefix >> Symbol_Pos.cartouche_content) -- Symbol_Pos.scan_pos);
614
615
616(* scan space *)
617
618fun space_symbol (s, _) = Symbol.is_blank s andalso s <> "\n";
619
620val scan_space =
621  Scan.many1 space_symbol @@@ Scan.optional ($$$ "\n") [] ||
622  Scan.many space_symbol @@@ $$$ "\n";
623
624
625(* scan comment *)
626
627val scan_comment =
628  Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body err_prefix -- Symbol_Pos.scan_pos);
629
630
631
632(** token sources **)
633
634local
635
636fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;
637
638fun token k ss =
639  Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.content ss), Slot);
640
641fun token_range k (pos1, (ss, pos2)) =
642  Token (Symbol_Pos.implode_range (pos1, pos2) ss, (k, Symbol_Pos.content ss), Slot);
643
644fun scan_token keywords = !!! "bad input"
645  (Symbol_Pos.scan_string_qq err_prefix >> token_range String ||
646    Symbol_Pos.scan_string_bq err_prefix >> token_range Alt_String ||
647    scan_verbatim >> token_range Verbatim ||
648    scan_cartouche >> token_range Cartouche ||
649    scan_comment >> token_range (Comment NONE) ||
650    Comment.scan_outer >> (fn (k, ss) => token (Comment (SOME k)) ss) ||
651    scan_space >> token Space ||
652    (Scan.max token_leq
653      (Scan.max token_leq
654        (Scan.literal (Keyword.major_keywords keywords) >> pair Command)
655        (Scan.literal (Keyword.minor_keywords keywords) >> pair Keyword))
656      (Lexicon.scan_longid >> pair Long_Ident ||
657        Lexicon.scan_id >> pair Ident ||
658        Lexicon.scan_var >> pair Var ||
659        Lexicon.scan_tid >> pair Type_Ident ||
660        Lexicon.scan_tvar >> pair Type_Var ||
661        Symbol_Pos.scan_float >> pair Float ||
662        Symbol_Pos.scan_nat >> pair Nat ||
663        scan_symid >> pair Sym_Ident) >> uncurry token));
664
665fun recover msg =
666  (Symbol_Pos.recover_string_qq ||
667    Symbol_Pos.recover_string_bq ||
668    recover_verbatim ||
669    Symbol_Pos.recover_cartouche ||
670    Symbol_Pos.recover_comment ||
671    Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single)
672  >> (single o token (Error msg));
673
674in
675
676fun make_source keywords {strict} =
677  let
678    val scan_strict = Scan.bulk (scan_token keywords);
679    val scan = if strict then scan_strict else Scan.recover scan_strict recover;
680  in Source.source Symbol_Pos.stopper scan end;
681
682fun read_cartouche syms =
683  (case Scan.read Symbol_Pos.stopper (scan_cartouche >> token_range Cartouche) syms of
684    SOME tok => tok
685  | NONE => error ("Single cartouche expected" ^ Position.here (#1 (Symbol_Pos.range syms))));
686
687end;
688
689
690(* explode *)
691
692fun tokenize keywords strict syms =
693  Source.of_list syms |> make_source keywords strict |> Source.exhaust;
694
695fun explode keywords pos text =
696  Symbol_Pos.explode (text, pos) |> tokenize keywords {strict = false};
697
698fun explode0 keywords = explode keywords Position.none;
699
700
701(* print name in parsable form *)
702
703fun print_name keywords name =
704  ((case explode keywords Position.none name of
705    [tok] => not (member (op =) [Ident, Long_Ident, Sym_Ident, Nat] (kind_of tok))
706  | _ => true) ? Symbol_Pos.quote_string_qq) name;
707
708
709(* make *)
710
711fun make ((k, n), s) pos =
712  let
713    val pos' = Position.advance_offsets n pos;
714    val range = Position.range (pos, pos');
715    val tok =
716      if 0 <= k andalso k < Vector.length immediate_kinds then
717        Token ((s, range), (Vector.sub (immediate_kinds, k), s), Slot)
718      else
719        (case explode Keyword.empty_keywords pos s of
720          [tok] => tok
721        | _ => Token ((s, range), (Error (err_prefix ^ "exactly one token expected"), s), Slot))
722  in (tok, pos') end;
723
724fun make_string (s, pos) =
725  let
726    val Token ((x, _), y, z) = #1 (make ((~1, 0), Symbol_Pos.quote_string_qq s) Position.none);
727    val pos' = Position.no_range_position pos;
728  in Token ((x, (pos', pos')), y, z) end;
729
730val make_int = explode Keyword.empty_keywords Position.none o signed_string_of_int;
731
732fun make_src a args = make_string a :: args;
733
734
735
736(** parsers **)
737
738type 'a parser = T list -> 'a * T list;
739type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list);
740
741
742(* read body -- e.g. antiquotation source *)
743
744fun read_body keywords scan =
745  tokenize (Keyword.no_command_keywords keywords) {strict = true}
746  #> filter is_proper
747  #> Scan.read stopper scan;
748
749fun read_antiq keywords scan (syms, pos) =
750  (case read_body keywords scan syms of
751    SOME x => x
752  | NONE => error ("Malformed antiquotation" ^ Position.here pos));
753
754
755(* wrapped syntax *)
756
757fun syntax_generic scan src context =
758  let
759    val (name, pos) = name_of_src src;
760    val old_reports = maps reports_of_value src;
761    val args1 = map init_assignable (args_of_src src);
762    fun reported_text () =
763      if Context_Position.is_visible_generic context then
764        let val new_reports = maps (reports_of_value o closure) args1 in
765          if old_reports <> new_reports then
766            map (fn (p, m) => Position.reported_text p m "") new_reports
767          else []
768        end
769      else [];
770  in
771    (case Scan.error (Scan.finite' stopper (Scan.option scan)) (context, args1) of
772      (SOME x, (context', [])) =>
773        let val _ = Output.report (reported_text ())
774        in (x, context') end
775    | (_, (context', args2)) =>
776        let
777          val print_name =
778            (case get_name (hd src) of
779              NONE => quote name
780            | SOME {kind, print, ...} =>
781                let
782                  val ctxt' = Context.proof_of context';
783                  val (markup, xname) = print ctxt';
784                in plain_words kind ^ " " ^ quote (Markup.markup markup xname) end);
785          val print_args =
786            if null args2 then "" else ":\n  " ^ space_implode " " (map print args2);
787        in
788          error ("Bad arguments for " ^ print_name ^ Position.here pos ^ print_args ^
789            Markup.markup_report (implode (reported_text ())))
790        end)
791  end;
792
793fun syntax scan src = apsnd Context.the_proof o syntax_generic scan src o Context.Proof;
794
795end;
796
797type 'a parser = 'a Token.parser;
798type 'a context_parser = 'a Token.context_parser;
799