1(* Title: Pure/Tools/rail.ML 2 Author: Michael Kerscher, TU M��nchen 3 Author: Makarius 4 5Railroad diagrams in LaTeX. 6*) 7 8signature RAIL = 9sig 10 datatype rails = 11 Cat of int * rail list 12 and rail = 13 Bar of rails list | 14 Plus of rails * rails | 15 Newline of int | 16 Nonterminal of string | 17 Terminal of bool * string | 18 Antiquote of bool * Antiquote.antiq 19 val read: Proof.context -> Input.source -> (string Antiquote.antiquote * rail) list 20 val output_rules: Proof.context -> (string Antiquote.antiquote * rail) list -> Latex.text 21end; 22 23structure Rail: RAIL = 24struct 25 26(** lexical syntax **) 27 28(* singleton keywords *) 29 30val keywords = 31 Symtab.make [ 32 ("|", Markup.keyword3), 33 ("*", Markup.keyword3), 34 ("+", Markup.keyword3), 35 ("?", Markup.keyword3), 36 ("(", Markup.empty), 37 (")", Markup.empty), 38 ("\<newline>", Markup.keyword2), 39 (";", Markup.keyword2), 40 (":", Markup.keyword2), 41 ("@", Markup.keyword1)]; 42 43 44(* datatype token *) 45 46datatype kind = 47 Keyword | Ident | String | Space | Comment of Comment.kind | Antiq of Antiquote.antiq | EOF; 48 49datatype token = Token of Position.range * (kind * string); 50 51fun pos_of (Token ((pos, _), _)) = pos; 52fun end_pos_of (Token ((_, pos), _)) = pos; 53 54fun range_of (toks as tok :: _) = 55 let val pos' = end_pos_of (List.last toks) 56 in Position.range (pos_of tok, pos') end 57 | range_of [] = Position.no_range; 58 59fun kind_of (Token (_, (k, _))) = k; 60fun content_of (Token (_, (_, x))) = x; 61 62fun is_proper (Token (_, (Space, _))) = false 63 | is_proper (Token (_, (Comment _, _))) = false 64 | is_proper _ = true; 65 66 67(* diagnostics *) 68 69val print_kind = 70 fn Keyword => "rail keyword" 71 | Ident => "identifier" 72 | String => "single-quoted string" 73 | Space => "white space" 74 | Comment _ => "formal comment" 75 | Antiq _ => "antiquotation" 76 | EOF => "end-of-input"; 77 78fun print (Token ((pos, _), (k, x))) = 79 (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^ 80 Position.here pos; 81 82fun print_keyword x = print_kind Keyword ^ " " ^ quote x; 83 84fun reports_of_token (Token ((pos, _), (Keyword, x))) = 85 map (pair pos) (the_list (Symtab.lookup keywords x) @ Completion.suppress_abbrevs x) 86 | reports_of_token (Token ((pos, _), (String, _))) = [(pos, Markup.inner_string)] 87 | reports_of_token (Token (_, (Antiq antiq, _))) = Antiquote.antiq_reports [Antiquote.Antiq antiq] 88 | reports_of_token _ = []; 89 90 91(* stopper *) 92 93fun mk_eof pos = Token ((pos, Position.none), (EOF, "")); 94val eof = mk_eof Position.none; 95 96fun is_eof (Token (_, (EOF, _))) = true 97 | is_eof _ = false; 98 99val stopper = 100 Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; 101 102 103(* tokenize *) 104 105local 106 107fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))]; 108 109fun antiq_token antiq = 110 [Token (#range antiq, (Antiq antiq, Symbol_Pos.content (#body antiq)))]; 111 112val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol); 113 114val scan_keyword = 115 Scan.one (Symtab.defined keywords o Symbol_Pos.symbol); 116 117val err_prefix = "Rail lexical error: "; 118 119val scan_token = 120 scan_space >> token Space || 121 Comment.scan_inner >> (fn (kind, ss) => token (Comment kind) ss) || 122 Antiquote.scan_antiq >> antiq_token || 123 scan_keyword >> (token Keyword o single) || 124 Lexicon.scan_id >> token Ident || 125 Symbol_Pos.scan_string_q err_prefix >> (fn (pos1, (ss, pos2)) => 126 [Token (Position.range (pos1, pos2), (String, Symbol_Pos.content ss))]); 127 128val scan = 129 Scan.repeats scan_token --| 130 Symbol_Pos.!!! (fn () => err_prefix ^ "bad input") 131 (Scan.ahead (Scan.one Symbol_Pos.is_eof)); 132 133in 134 135val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan); 136 137end; 138 139 140 141(** parsing **) 142 143(* parser combinators *) 144 145fun !!! scan = 146 let 147 val prefix = "Rail syntax error"; 148 149 fun get_pos [] = " (end-of-input)" 150 | get_pos (tok :: _) = Position.here (pos_of tok); 151 152 fun err (toks, NONE) = (fn () => prefix ^ get_pos toks) 153 | err (toks, SOME msg) = 154 (fn () => 155 let val s = msg () in 156 if String.isPrefix prefix s then s 157 else prefix ^ get_pos toks ^ ": " ^ s 158 end); 159 in Scan.!! err scan end; 160 161fun $$$ x = 162 Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) || 163 Scan.fail_with 164 (fn [] => (fn () => print_keyword x ^ " expected,\nbut end-of-input was found") 165 | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found")); 166 167fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan); 168fun enum sep scan = enum1 sep scan || Scan.succeed []; 169 170val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE); 171val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE); 172 173val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE)); 174 175fun RANGE scan = Scan.trace scan >> apsnd range_of; 176fun RANGE_APP scan = RANGE scan >> (fn (f, r) => f r); 177 178 179(* parse trees *) 180 181datatype trees = 182 CAT of tree list * Position.range 183and tree = 184 BAR of trees list * Position.range | 185 STAR of (trees * trees) * Position.range | 186 PLUS of (trees * trees) * Position.range | 187 MAYBE of tree * Position.range | 188 NEWLINE of Position.range | 189 NONTERMINAL of string * Position.range | 190 TERMINAL of (bool * string) * Position.range | 191 ANTIQUOTE of (bool * Antiquote.antiq) * Position.range; 192 193fun reports_of_tree ctxt = 194 if Context_Position.is_visible ctxt then 195 let 196 fun reports r = 197 if r = Position.no_range then [] 198 else [(Position.range_position r, Markup.expression "")]; 199 fun trees (CAT (ts, r)) = reports r @ maps tree ts 200 and tree (BAR (Ts, r)) = reports r @ maps trees Ts 201 | tree (STAR ((T1, T2), r)) = reports r @ trees T1 @ trees T2 202 | tree (PLUS ((T1, T2), r)) = reports r @ trees T1 @ trees T2 203 | tree (MAYBE (t, r)) = reports r @ tree t 204 | tree (NEWLINE r) = reports r 205 | tree (NONTERMINAL (_, r)) = reports r 206 | tree (TERMINAL (_, r)) = reports r 207 | tree (ANTIQUOTE (_, r)) = reports r; 208 in distinct (op =) o tree end 209 else K []; 210 211local 212 213val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true); 214 215fun body x = (RANGE (enum1 "|" body1) >> BAR) x 216and body0 x = (RANGE (enum "|" body1) >> BAR) x 217and body1 x = 218 (RANGE_APP (body2 :|-- (fn a => 219 $$$ "*" |-- !!! body4e >> (fn b => fn r => CAT ([STAR ((a, b), r)], r)) || 220 $$$ "+" |-- !!! body4e >> (fn b => fn r => CAT ([PLUS ((a, b), r)], r)) || 221 Scan.succeed (K a)))) x 222and body2 x = (RANGE (Scan.repeat1 body3) >> CAT) x 223and body3 x = 224 (RANGE_APP (body4 :|-- (fn a => 225 $$$ "?" >> K (curry MAYBE a) || 226 Scan.succeed (K a)))) x 227and body4 x = 228 ($$$ "(" |-- !!! (body0 --| $$$ ")") || 229 RANGE_APP 230 ($$$ "\<newline>" >> K NEWLINE || 231 ident >> curry NONTERMINAL || 232 at_mode -- string >> curry TERMINAL || 233 at_mode -- antiq >> curry ANTIQUOTE)) x 234and body4e x = 235 (RANGE (Scan.option body4) >> (fn (a, r) => CAT (the_list a, r))) x; 236 237val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq; 238val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text ""); 239val rules = enum1 ";" (Scan.option rule) >> map_filter I; 240 241in 242 243fun parse_rules toks = 244 #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks); 245 246end; 247 248 249(** rail expressions **) 250 251(* datatype *) 252 253datatype rails = 254 Cat of int * rail list 255and rail = 256 Bar of rails list | 257 Plus of rails * rails | 258 Newline of int | 259 Nonterminal of string | 260 Terminal of bool * string | 261 Antiquote of bool * Antiquote.antiq; 262 263fun is_newline (Newline _) = true | is_newline _ = false; 264 265 266(* prepare *) 267 268local 269 270fun cat rails = Cat (0, rails); 271 272val empty = cat []; 273fun is_empty (Cat (_, [])) = true | is_empty _ = false; 274 275fun bar [Cat (_, [rail])] = rail 276 | bar cats = Bar cats; 277 278fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails)) 279and reverse (Bar cats) = Bar (map reverse_cat cats) 280 | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2) 281 | reverse x = x; 282 283fun plus (cat1, cat2) = Plus (cat1, reverse_cat cat2); 284 285in 286 287fun prepare_trees (CAT (ts, _)) = Cat (0, map prepare_tree ts) 288and prepare_tree (BAR (Ts, _)) = bar (map prepare_trees Ts) 289 | prepare_tree (STAR (Ts, _)) = 290 let val (cat1, cat2) = apply2 prepare_trees Ts in 291 if is_empty cat2 then plus (empty, cat1) 292 else bar [empty, cat [plus (cat1, cat2)]] 293 end 294 | prepare_tree (PLUS (Ts, _)) = plus (apply2 prepare_trees Ts) 295 | prepare_tree (MAYBE (t, _)) = bar [empty, cat [prepare_tree t]] 296 | prepare_tree (NEWLINE _) = Newline 0 297 | prepare_tree (NONTERMINAL (a, _)) = Nonterminal a 298 | prepare_tree (TERMINAL (a, _)) = Terminal a 299 | prepare_tree (ANTIQUOTE (a, _)) = Antiquote a; 300 301end; 302 303 304(* read *) 305 306fun read ctxt source = 307 let 308 val _ = Context_Position.report ctxt (Input.pos_of source) Markup.language_rail; 309 val toks = tokenize (Input.source_explode source); 310 val _ = Context_Position.reports ctxt (maps reports_of_token toks); 311 val rules = parse_rules (filter is_proper toks); 312 val _ = Position.reports (maps (reports_of_tree ctxt o #2) rules); 313 in map (apsnd prepare_tree) rules end; 314 315 316(* latex output *) 317 318local 319 320fun vertical_range_cat (Cat (_, rails)) y = 321 let val (rails', (_, y')) = 322 fold_map (fn rail => fn (y0, y') => 323 if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2)) 324 else 325 let val (rail', y0') = vertical_range rail y0; 326 in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1) 327 in (Cat (y, rails'), y') end 328 329and vertical_range (Bar cats) y = 330 let val (cats', y') = fold_map vertical_range_cat cats y 331 in (Bar cats', Int.max (y + 1, y')) end 332 | vertical_range (Plus (cat1, cat2)) y = 333 let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y; 334 in (Plus (cat1', cat2'), Int.max (y + 1, y')) end 335 | vertical_range (Newline _) y = (Newline (y + 2), y + 3) 336 | vertical_range atom y = (atom, y + 1); 337 338in 339 340fun output_rules ctxt rules = 341 let 342 val output_antiq = 343 Antiquote.Antiq #> 344 Document_Antiquotation.evaluate (single o Latex.symbols) ctxt #> 345 Latex.output_text; 346 fun output_text b s = 347 Output.output s 348 |> b ? enclose "\\isakeyword{" "}" 349 |> enclose "\\isa{" "}"; 350 351 fun output_cat c (Cat (_, rails)) = outputs c rails 352 and outputs c [rail] = output c rail 353 | outputs _ rails = implode (map (output "") rails) 354 and output _ (Bar []) = "" 355 | output c (Bar [cat]) = output_cat c cat 356 | output _ (Bar (cat :: cats)) = 357 "\\rail@bar\n" ^ output_cat "" cat ^ 358 implode (map (fn Cat (y, rails) => 359 "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^ 360 "\\rail@endbar\n" 361 | output c (Plus (cat, Cat (y, rails))) = 362 "\\rail@plus\n" ^ output_cat c cat ^ 363 "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^ 364 "\\rail@endplus\n" 365 | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n" 366 | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n" 367 | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n" 368 | output c (Antiquote (b, a)) = 369 "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n"; 370 371 fun output_rule (name, rail) = 372 let 373 val (rail', y') = vertical_range rail 0; 374 val out_name = 375 (case name of 376 Antiquote.Text "" => "" 377 | Antiquote.Text s => output_text false s 378 | Antiquote.Antiq a => output_antiq a); 379 in 380 "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^ 381 output "" rail' ^ 382 "\\rail@end\n" 383 end; 384 in Latex.string (Latex.environment "railoutput" (implode (map output_rule rules))) end; 385 386val _ = Theory.setup 387 (Thy_Output.antiquotation_raw_embedded \<^binding>\<open>rail\<close> (Scan.lift Args.text_input) 388 (fn ctxt => output_rules ctxt o read ctxt)); 389 390end; 391 392end; 393