1(*  Title:      HOL/Tools/SMT/smtlib.ML
2    Author:     Sascha Boehme, TU Muenchen
3
4Parsing and generating SMT-LIB 2.
5*)
6
7signature SMTLIB =
8sig
9  exception PARSE of int * string
10  datatype tree = 
11    Num of int |
12    Dec of int * int |
13    Str of string |
14    Sym of string |
15    Key of string |
16    S of tree list
17  val parse: string list -> tree
18  val pretty_tree: tree -> Pretty.T
19  val str_of: tree -> string
20end;
21
22structure SMTLIB: SMTLIB =
23struct
24
25(* data structures *)
26
27exception PARSE of int * string
28
29datatype tree = 
30  Num of int |
31  Dec of int * int |
32  Str of string |
33  Sym of string |
34  Key of string |
35  S of tree list
36
37datatype unfinished = None | String of string | Symbol of string
38
39
40(* utilities *)
41
42fun read_raw pred l cs =
43  (case chop_prefix pred cs of
44    ([], []) => raise PARSE (l, "empty token")
45  | ([], c :: _) => raise PARSE (l, "unexpected character " ^ quote c)
46  | x => x)
47
48
49(* numerals and decimals *)
50
51fun int_of cs = fst (read_int cs)
52
53fun read_num l cs =
54  (case read_raw Symbol.is_ascii_digit l cs of
55    (cs1, "." :: cs') =>
56      let val (cs2, cs'') = read_raw Symbol.is_ascii_digit l cs'
57      in (Dec (int_of cs1, int_of cs2), cs'') end
58  | (cs1, cs2) => (Num (int_of cs1), cs2))
59
60
61(* binary numbers *)
62
63fun is_bin c = (c = "0" orelse c = "1")
64
65fun read_bin l cs = read_raw is_bin l cs |>> Num o fst o read_radix_int 2
66
67
68(* hex numbers *)
69
70val is_hex = member (op =) (raw_explode "0123456789abcdefABCDEF")
71
72fun within c1 c2 c = (ord c1 <= ord c andalso ord c <= ord c2)
73
74fun unhex i [] = i
75  | unhex i (c :: cs) =
76      if within "0" "9" c then unhex (i * 16 + (ord c - ord "0")) cs
77      else if within "a" "f" c then unhex (i * 16 + (ord c - ord "a" + 10)) cs
78      else if within "A" "F" c then unhex (i * 16 + (ord c - ord "A" + 10)) cs
79      else raise Fail ("bad hex character " ^ quote c)
80
81fun read_hex l cs = read_raw is_hex l cs |>> Num o unhex 0
82
83
84(* symbols *)
85
86val symbol_chars = raw_explode "~!@$%^&*_+=<>.?/-" 
87
88fun is_sym c =
89  Symbol.is_ascii_letter c orelse
90  Symbol.is_ascii_digit c orelse
91  member (op =) symbol_chars c
92
93fun read_sym f l cs = read_raw is_sym l cs |>> f o implode
94
95
96(* quoted tokens *)
97
98fun read_quoted stop (escape, replacement) cs =
99  let
100    fun read _ [] = NONE
101      | read rs (cs as (c :: cs')) =
102          if is_prefix (op =) stop cs then
103            SOME (implode (rev rs), drop (length stop) cs)
104          else if not (null escape) andalso is_prefix (op =) escape cs then
105            read (replacement :: rs) (drop (length escape) cs)
106          else read (c :: rs) cs'
107  in read [] cs end
108
109fun read_string cs = read_quoted ["\\", "\""] (["\\", "\\"], "\\") cs
110fun read_symbol cs = read_quoted ["|"] ([], "") cs
111
112
113(* core parser *)
114
115fun read _ [] rest tss = (rest, tss)
116  | read l ("(" :: cs) None tss = read l cs None ([] :: tss)
117  | read l (")" :: cs) None (ts1 :: ts2 :: tss) =
118      read l cs None ((S (rev ts1) :: ts2) :: tss)
119  | read l ("#" :: "x" :: cs) None (ts :: tss) =
120      token read_hex l cs ts tss
121  | read l ("#" :: "b" :: cs) None (ts :: tss) =
122      token read_bin l cs ts tss
123  | read l (":" :: cs) None (ts :: tss) =
124      token (read_sym Key) l cs ts tss
125  | read l ("\"" :: cs) None (ts :: tss) =
126      quoted read_string String Str l "" cs ts tss
127  | read l ("|" :: cs) None (ts :: tss) =
128      quoted read_symbol Symbol Sym l "" cs ts tss
129  | read l ((c as "!") :: cs) None (ts :: tss) =
130      token (fn _ => pair (Sym c)) l cs ts tss
131  | read l (c :: cs) None (ts :: tss) =
132      if Symbol.is_ascii_blank c then read l cs None (ts :: tss)
133      else if Symbol.is_digit c then token read_num l (c :: cs) ts tss
134      else token (read_sym Sym) l (c :: cs) ts tss
135  | read l cs (String s) (ts :: tss) =
136      quoted read_string String Str l s cs ts tss
137  | read l cs (Symbol s) (ts :: tss) =
138      quoted read_symbol Symbol Sym l s cs ts tss
139  | read l _ _ [] = raise PARSE (l, "bad parser state")
140
141and token f l cs ts tss =
142  let val (t, cs') = f l cs
143  in read l cs' None ((t :: ts) :: tss) end
144
145and quoted r f g l s cs ts tss =
146  (case r cs of
147    NONE => (f (s ^ implode cs), ts :: tss)
148  | SOME (s', cs') => read l cs' None ((g (s ^ s') :: ts) :: tss))
149  
150
151
152(* overall parser *)
153
154fun read_line l line = read l (raw_explode line)
155
156fun add_line line (l, (None, tss)) =
157      if size line = 0 orelse nth_string line 0 = ";" then (l + 1, (None, tss))
158      else (l + 1, read_line l line None tss)
159  | add_line line (l, (unfinished, tss)) =
160      (l + 1, read_line l line unfinished tss)
161
162fun finish (_, (None, [[t]])) = t
163  | finish (l, _) = raise PARSE (l, "bad nesting")
164
165fun parse lines = finish (fold add_line lines (1, (None, [[]])))
166
167
168(* pretty printer *)
169
170fun pretty_tree (Num i) = Pretty.str (string_of_int i)
171  | pretty_tree (Dec (i, j)) =
172      Pretty.str (string_of_int i ^ "." ^ string_of_int j)
173  | pretty_tree (Str s) =
174      raw_explode s
175      |> maps (fn "\"" => ["\\", "\""] | "\\" => ["\\", "\\"] | c => [c])
176      |> implode
177      |> enclose "\"" "\""
178      |> Pretty.str
179  | pretty_tree (Sym s) =
180      if String.isPrefix "(" s (* for bit vector functions *) orelse
181         forall is_sym (raw_explode s) then
182        Pretty.str s
183      else
184        Pretty.str ("|" ^ s ^ "|")
185  | pretty_tree (Key s) = Pretty.str (":" ^ s)
186  | pretty_tree (S trees) =
187      Pretty.enclose "(" ")" (Pretty.separate "" (map pretty_tree trees))
188
189val str_of = Pretty.unformatted_string_of o pretty_tree
190
191end;
192