1(*  Title:      Pure/Tools/rail.ML
2    Author:     Michael Kerscher, TU M��nchen
3    Author:     Makarius
4
5Railroad diagrams in LaTeX.
6*)
7
8signature RAIL =
9sig
10  datatype rails =
11    Cat of int * rail list
12  and rail =
13    Bar of rails list |
14    Plus of rails * rails |
15    Newline of int |
16    Nonterminal of string |
17    Terminal of bool * string |
18    Antiquote of bool * Antiquote.antiq
19  val read: Proof.context -> Input.source -> (string Antiquote.antiquote * rail) list
20  val output_rules: Proof.context -> (string Antiquote.antiquote * rail) list -> Latex.text
21end;
22
23structure Rail: RAIL =
24struct
25
26(** lexical syntax **)
27
28(* singleton keywords *)
29
30val keywords =
31  Symtab.make [
32    ("|", Markup.keyword3),
33    ("*", Markup.keyword3),
34    ("+", Markup.keyword3),
35    ("?", Markup.keyword3),
36    ("(", Markup.empty),
37    (")", Markup.empty),
38    ("\<newline>", Markup.keyword2),
39    (";", Markup.keyword2),
40    (":", Markup.keyword2),
41    ("@", Markup.keyword1)];
42
43
44(* datatype token *)
45
46datatype kind =
47  Keyword | Ident | String | Space | Comment of Comment.kind | Antiq of Antiquote.antiq | EOF;
48
49datatype token = Token of Position.range * (kind * string);
50
51fun pos_of (Token ((pos, _), _)) = pos;
52fun end_pos_of (Token ((_, pos), _)) = pos;
53
54fun range_of (toks as tok :: _) =
55      let val pos' = end_pos_of (List.last toks)
56      in Position.range (pos_of tok, pos') end
57  | range_of [] = Position.no_range;
58
59fun kind_of (Token (_, (k, _))) = k;
60fun content_of (Token (_, (_, x))) = x;
61
62fun is_proper (Token (_, (Space, _))) = false
63  | is_proper (Token (_, (Comment _, _))) = false
64  | is_proper _ = true;
65
66
67(* diagnostics *)
68
69val print_kind =
70 fn Keyword => "rail keyword"
71  | Ident => "identifier"
72  | String => "single-quoted string"
73  | Space => "white space"
74  | Comment _ => "formal comment"
75  | Antiq _ => "antiquotation"
76  | EOF => "end-of-input";
77
78fun print (Token ((pos, _), (k, x))) =
79  (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
80  Position.here pos;
81
82fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
83
84fun reports_of_token (Token ((pos, _), (Keyword, x))) =
85      map (pair pos) (the_list (Symtab.lookup keywords x) @ Completion.suppress_abbrevs x)
86  | reports_of_token (Token ((pos, _), (String, _))) = [(pos, Markup.inner_string)]
87  | reports_of_token (Token (_, (Antiq antiq, _))) = Antiquote.antiq_reports [Antiquote.Antiq antiq]
88  | reports_of_token _ = [];
89
90
91(* stopper *)
92
93fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
94val eof = mk_eof Position.none;
95
96fun is_eof (Token (_, (EOF, _))) = true
97  | is_eof _ = false;
98
99val stopper =
100  Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
101
102
103(* tokenize *)
104
105local
106
107fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
108
109fun antiq_token antiq =
110  [Token (#range antiq, (Antiq antiq, Symbol_Pos.content (#body antiq)))];
111
112val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
113
114val scan_keyword =
115  Scan.one (Symtab.defined keywords o Symbol_Pos.symbol);
116
117val err_prefix = "Rail lexical error: ";
118
119val scan_token =
120  scan_space >> token Space ||
121  Comment.scan_inner >> (fn (kind, ss) => token (Comment kind) ss) ||
122  Antiquote.scan_antiq >> antiq_token ||
123  scan_keyword >> (token Keyword o single) ||
124  Lexicon.scan_id >> token Ident ||
125  Symbol_Pos.scan_string_q err_prefix >> (fn (pos1, (ss, pos2)) =>
126    [Token (Position.range (pos1, pos2), (String, Symbol_Pos.content ss))]);
127
128val scan =
129  Scan.repeats scan_token --|
130    Symbol_Pos.!!! (fn () => err_prefix ^ "bad input")
131      (Scan.ahead (Scan.one Symbol_Pos.is_eof));
132
133in
134
135val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan);
136
137end;
138
139
140
141(** parsing **)
142
143(* parser combinators *)
144
145fun !!! scan =
146  let
147    val prefix = "Rail syntax error";
148
149    fun get_pos [] = " (end-of-input)"
150      | get_pos (tok :: _) = Position.here (pos_of tok);
151
152    fun err (toks, NONE) = (fn () => prefix ^ get_pos toks)
153      | err (toks, SOME msg) =
154          (fn () =>
155            let val s = msg () in
156              if String.isPrefix prefix s then s
157              else prefix ^ get_pos toks ^ ": " ^ s
158            end);
159  in Scan.!! err scan end;
160
161fun $$$ x =
162  Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
163  Scan.fail_with
164    (fn [] => (fn () => print_keyword x ^ " expected,\nbut end-of-input was found")
165      | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found"));
166
167fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
168fun enum sep scan = enum1 sep scan || Scan.succeed [];
169
170val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE);
171val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE);
172
173val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE));
174
175fun RANGE scan = Scan.trace scan >> apsnd range_of;
176fun RANGE_APP scan = RANGE scan >> (fn (f, r) => f r);
177
178
179(* parse trees *)
180
181datatype trees =
182  CAT of tree list * Position.range
183and tree =
184  BAR of trees list * Position.range |
185  STAR of (trees * trees) * Position.range |
186  PLUS of (trees * trees) * Position.range |
187  MAYBE of tree * Position.range |
188  NEWLINE of Position.range |
189  NONTERMINAL of string * Position.range |
190  TERMINAL of (bool * string) * Position.range |
191  ANTIQUOTE of (bool * Antiquote.antiq) * Position.range;
192
193fun reports_of_tree ctxt =
194  if Context_Position.is_visible ctxt then
195    let
196      fun reports r =
197        if r = Position.no_range then []
198        else [(Position.range_position r, Markup.expression "")];
199      fun trees (CAT (ts, r)) = reports r @ maps tree ts
200      and tree (BAR (Ts, r)) = reports r @ maps trees Ts
201        | tree (STAR ((T1, T2), r)) = reports r @ trees T1 @ trees T2
202        | tree (PLUS ((T1, T2), r)) = reports r @ trees T1 @ trees T2
203        | tree (MAYBE (t, r)) = reports r @ tree t
204        | tree (NEWLINE r) = reports r
205        | tree (NONTERMINAL (_, r)) = reports r
206        | tree (TERMINAL (_, r)) = reports r
207        | tree (ANTIQUOTE (_, r)) = reports r;
208    in distinct (op =) o tree end
209  else K [];
210
211local
212
213val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
214
215fun body x = (RANGE (enum1 "|" body1) >> BAR) x
216and body0 x = (RANGE (enum "|" body1) >> BAR) x
217and body1 x =
218 (RANGE_APP (body2 :|-- (fn a =>
219   $$$ "*" |-- !!! body4e >> (fn b => fn r => CAT ([STAR ((a, b), r)], r)) ||
220   $$$ "+" |-- !!! body4e >> (fn b => fn r => CAT ([PLUS ((a, b), r)], r)) ||
221   Scan.succeed (K a)))) x
222and body2 x = (RANGE (Scan.repeat1 body3) >> CAT) x
223and body3 x =
224 (RANGE_APP (body4 :|-- (fn a =>
225   $$$ "?" >> K (curry MAYBE a) ||
226   Scan.succeed (K a)))) x
227and body4 x =
228 ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
229  RANGE_APP
230   ($$$ "\<newline>" >> K NEWLINE ||
231    ident >> curry NONTERMINAL ||
232    at_mode -- string >> curry TERMINAL ||
233    at_mode -- antiq >> curry ANTIQUOTE)) x
234and body4e x =
235  (RANGE (Scan.option body4) >> (fn (a, r) => CAT (the_list a, r))) x;
236
237val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
238val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
239val rules = enum1 ";" (Scan.option rule) >> map_filter I;
240
241in
242
243fun parse_rules toks =
244  #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks);
245
246end;
247
248
249(** rail expressions **)
250
251(* datatype *)
252
253datatype rails =
254  Cat of int * rail list
255and rail =
256  Bar of rails list |
257  Plus of rails * rails |
258  Newline of int |
259  Nonterminal of string |
260  Terminal of bool * string |
261  Antiquote of bool * Antiquote.antiq;
262
263fun is_newline (Newline _) = true | is_newline _ = false;
264
265
266(* prepare *)
267
268local
269
270fun cat rails = Cat (0, rails);
271
272val empty = cat [];
273fun is_empty (Cat (_, [])) = true | is_empty _ = false;
274
275fun bar [Cat (_, [rail])] = rail
276  | bar cats = Bar cats;
277
278fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
279and reverse (Bar cats) = Bar (map reverse_cat cats)
280  | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
281  | reverse x = x;
282
283fun plus (cat1, cat2) = Plus (cat1, reverse_cat cat2);
284
285in
286
287fun prepare_trees (CAT (ts, _)) = Cat (0, map prepare_tree ts)
288and prepare_tree (BAR (Ts, _)) = bar (map prepare_trees Ts)
289  | prepare_tree (STAR (Ts, _)) =
290      let val (cat1, cat2) = apply2 prepare_trees Ts in
291        if is_empty cat2 then plus (empty, cat1)
292        else bar [empty, cat [plus (cat1, cat2)]]
293      end
294  | prepare_tree (PLUS (Ts, _)) = plus (apply2 prepare_trees Ts)
295  | prepare_tree (MAYBE (t, _)) = bar [empty, cat [prepare_tree t]]
296  | prepare_tree (NEWLINE _) = Newline 0
297  | prepare_tree (NONTERMINAL (a, _)) = Nonterminal a
298  | prepare_tree (TERMINAL (a, _)) = Terminal a
299  | prepare_tree (ANTIQUOTE (a, _)) = Antiquote a;
300
301end;
302
303
304(* read *)
305
306fun read ctxt source =
307  let
308    val _ = Context_Position.report ctxt (Input.pos_of source) Markup.language_rail;
309    val toks = tokenize (Input.source_explode source);
310    val _ = Context_Position.reports ctxt (maps reports_of_token toks);
311    val rules = parse_rules (filter is_proper toks);
312    val _ = Position.reports (maps (reports_of_tree ctxt o #2) rules);
313  in map (apsnd prepare_tree) rules end;
314
315
316(* latex output *)
317
318local
319
320fun vertical_range_cat (Cat (_, rails)) y =
321  let val (rails', (_, y')) =
322    fold_map (fn rail => fn (y0, y') =>
323      if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
324      else
325        let val (rail', y0') = vertical_range rail y0;
326        in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
327  in (Cat (y, rails'), y') end
328
329and vertical_range (Bar cats) y =
330      let val (cats', y') = fold_map vertical_range_cat cats y
331      in (Bar cats', Int.max (y + 1, y')) end
332  | vertical_range (Plus (cat1, cat2)) y =
333      let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
334      in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
335  | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
336  | vertical_range atom y = (atom, y + 1);
337
338in
339
340fun output_rules ctxt rules =
341  let
342    val output_antiq =
343      Antiquote.Antiq #>
344      Document_Antiquotation.evaluate (single o Latex.symbols) ctxt #>
345      Latex.output_text;
346    fun output_text b s =
347      Output.output s
348      |> b ? enclose "\\isakeyword{" "}"
349      |> enclose "\\isa{" "}";
350
351    fun output_cat c (Cat (_, rails)) = outputs c rails
352    and outputs c [rail] = output c rail
353      | outputs _ rails = implode (map (output "") rails)
354    and output _ (Bar []) = ""
355      | output c (Bar [cat]) = output_cat c cat
356      | output _ (Bar (cat :: cats)) =
357          "\\rail@bar\n" ^ output_cat "" cat ^
358          implode (map (fn Cat (y, rails) =>
359              "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
360          "\\rail@endbar\n"
361      | output c (Plus (cat, Cat (y, rails))) =
362          "\\rail@plus\n" ^ output_cat c cat ^
363          "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
364          "\\rail@endplus\n"
365      | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
366      | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n"
367      | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n"
368      | output c (Antiquote (b, a)) =
369          "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n";
370
371    fun output_rule (name, rail) =
372      let
373        val (rail', y') = vertical_range rail 0;
374        val out_name =
375          (case name of
376            Antiquote.Text "" => ""
377          | Antiquote.Text s => output_text false s
378          | Antiquote.Antiq a => output_antiq a);
379      in
380        "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^
381        output "" rail' ^
382        "\\rail@end\n"
383      end;
384  in Latex.string (Latex.environment "railoutput" (implode (map output_rule rules))) end;
385
386val _ = Theory.setup
387  (Thy_Output.antiquotation_raw_embedded \<^binding>\<open>rail\<close> (Scan.lift Args.text_input)
388    (fn ctxt => output_rules ctxt o read ctxt));
389
390end;
391
392end;
393