1(* Copyright (c) 2009-2011 Tjark Weber. All rights reserved. *)
2
3(* Proof reconstruction for Z3: parsing of Z3's proofs *)
4
5structure Z3_ProofParser =
6struct
7
8  (* I tried to implement this parser in ML-Lex/ML-Yacc, but gave up
9     on that -- mainly for two reasons: 1. The whole toolchain/build
10     process gets more complicated. 2. Performance and memory usage of
11     the generated parser were far from satisfactory (probably because
12     my naive definition of the underlying grammar required the parser
13     to maintain a large stack internally). *)
14
15local
16
17  local open HolSmtTheory in end
18
19  open Z3_Proof
20
21  val ERR = Feedback.mk_HOL_ERR "Z3_ProofParser"
22  val WARNING = Feedback.HOL_WARNING "Z3_ProofParser"
23
24  (***************************************************************************)
25  (* auxiliary functions                                                     *)
26  (***************************************************************************)
27
28  fun proofterm_id (name : string) : int =
29    if String.isPrefix "@x" name then
30      let
31        val id = Option.valOf (Int.fromString (String.extract (name, 2, NONE)))
32          handle Option.Option =>
33            raise ERR "proofterm_id" "'@x' not followed by an integer"
34      in
35        if id < 1 then
36          raise ERR "proofterm_id" "integer less than 1 found"
37        else
38          id
39      end
40    else
41      raise ERR "proofterm_id" "'@x' prefix expected"
42
43  (***************************************************************************)
44  (* parsing Z3 proofterms as terms                                          *)
45  (***************************************************************************)
46
47  (* Z3 proofterms are essentially encoded in SMT-LIB term syntax, so
48     we re-use the SMT-LIB parser. *)
49
50  (* FIXME: However, there is a problem: the last argument to each
51     inference rule is the inferred conclusion (thus, a term), whereas
52     previous arguments are premises (thus, proofterms).
53     Unfortunately, the parser does not know whether it is parsing the
54     last argument until after it has parsed it.  We therefore parse
55     proofterms and terms uniformly as HOL terms, encoding the former
56     as terms of type :'pt.  Note that the current implementation
57     might parse certain terms (namely those containing functions
58     whose names coincide with Z3 inference rule names) erroneously as
59     proofterms.
60
61     I am hoping that the Z3 proof format will eventually be changed
62     so that this is no longer an issue in the parser.  It would
63     suffice to enclose each inference rule's list of premises in
64     parentheses; then the parser would find a ")" token once it has
65     parsed the last premise, and can continue by parsing a term. *)
66
67  val pt_ty = Type.mk_vartype "'pt"
68
69  fun zero_prems name =
70    SmtLib_Theories.K_zero_one (Lib.curry Term.mk_comb (Term.mk_var
71      (name, Type.--> (Type.bool, pt_ty))))
72
73  fun one_prem name =
74    SmtLib_Theories.K_zero_two (Lib.uncurry (Lib.curry Term.mk_comb o
75      Lib.curry Term.mk_comb (Term.mk_var (name, boolSyntax.list_mk_fun
76      ([pt_ty, Type.bool], pt_ty)))))
77
78  fun two_prems name =
79  let
80    val t = Term.mk_var (name,
81      boolSyntax.list_mk_fun ([pt_ty, pt_ty, Type.bool], pt_ty))
82  in
83    SmtLib_Theories.K_zero_three (fn (p1, p2, concl) =>
84      Term.list_mk_comb (t, [p1, p2, concl]))
85  end
86
87  fun list_prems name =
88    SmtLib_Theories.K_zero_list (Lib.uncurry (Lib.curry Term.mk_comb o
89      (Lib.curry Term.mk_comb (Term.mk_var (name, boolSyntax.list_mk_fun
90      ([listSyntax.mk_list_type pt_ty, Type.bool], pt_ty))))) o
91      Lib.apfst (Lib.C (Lib.curry listSyntax.mk_list) pt_ty) o Lib.front_last)
92
93  val z3_builtin_dict = Library.dict_from_list [
94    ("and-elim",        one_prem "and-elim"),
95    ("asserted",        zero_prems "asserted"),
96    ("commutativity",   zero_prems "commutativity"),
97    ("def-axiom",       zero_prems "def-axiom"),
98    ("elim-unused",     zero_prems "elim-unused"),
99    ("hypothesis",      zero_prems "hypothesis"),
100    ("iff-true",        one_prem "iff-true"),
101    ("lemma",           one_prem "lemma"),
102    ("monotonicity",    list_prems "monotonicity"),
103    ("mp",              two_prems "mp"),
104    ("not-or-elim",     one_prem "not-or-elim"),
105    ("quant-intro",     one_prem "quant-intro"),
106    ("rewrite",         zero_prems "rewrite"),
107    ("symm",            one_prem "symm"),
108    ("th-lemma-arith",  list_prems "th-lemma-arith"),
109    ("th-lemma-array",  list_prems "th-lemma-array"),
110    ("th-lemma-basic",  list_prems "th-lemma-basic"),
111    ("th-lemma-bv",     list_prems "th-lemma-bv"),
112    ("trans",           two_prems "trans"),
113    ("true-axiom",      zero_prems "true-axiom"),
114    ("unit-resolution", list_prems "unit-resolution"),
115
116    (* FIXME: I am hoping that the Z3 proof format will eventually be
117       changed to adhere to the SMT-LIB format more strictly, i.e.,
118       also for the following operations/constants, so that these
119       additional parsing functions are no longer necessary. *)
120    ("iff", SmtLib_Theories.K_zero_two boolSyntax.mk_eq),
121    ("implies", SmtLib_Theories.K_zero_two boolSyntax.mk_imp),
122    ("~", SmtLib_Theories.K_zero_one intSyntax.mk_negated),
123    ("~", SmtLib_Theories.K_zero_one realSyntax.mk_negated),
124    (* bit-vector constants: bvm[n] *)
125    ("_", SmtLib_Theories.zero_zero (fn token =>
126      if String.isPrefix "bv" token then
127        let
128          val args = String.extract (token, 2, NONE)
129          val (value, args) = Lib.pair_of_list (String.fields (Lib.equal #"[")
130            args)
131          val (size, args) = Lib.pair_of_list (String.fields (Lib.equal #"]")
132            args)
133          val _ = args = "" orelse
134            raise ERR "<z3_builtin_dict._>" "not a bit-vector constant"
135          val value = Library.parse_arbnum value
136          val size = Library.parse_arbnum size
137        in
138          wordsSyntax.mk_word (value, size)
139        end
140      else
141        raise ERR "<z3_builtin_dict._>" "not a bit-vector constant")),
142    (* extract[m:n] t *)
143    ("_", SmtLib_Theories.zero_one (fn token =>
144      if String.isPrefix "extract[" token then
145        let
146          val args = String.extract (token, 8, NONE)
147          val (m, args) = Lib.pair_of_list (String.fields (Lib.equal #":") args)
148          val (n, args) = Lib.pair_of_list (String.fields (Lib.equal #"]") args)
149          val _ = args = "" orelse
150            raise ERR "<z3_builtin_dict._>" "not extract[m:n]"
151          val m = Library.parse_arbnum m
152          val n = Library.parse_arbnum n
153          val index_type = fcpLib.index_type (Arbnum.plus1 (Arbnum.- (m, n)))
154          val m = numSyntax.mk_numeral m
155          val n = numSyntax.mk_numeral n
156        in
157          fn t => wordsSyntax.mk_word_extract (m, n, t, index_type)
158        end
159      else
160        raise ERR "<z3_builtin_dict._>" "not extract[m:n]")),
161    (* (_ extractm n) t *)
162    ("_", SmtLib_Theories.one_one (fn token => fn n =>
163      if String.isPrefix "extract" token then
164        let
165          val m = Library.parse_arbnum (String.extract (token, 7, NONE))
166          val index_type = fcpLib.index_type (Arbnum.plus1 (Arbnum.- (m, n)))
167          val m = numSyntax.mk_numeral m
168          val n = numSyntax.mk_numeral n
169        in
170          fn t => wordsSyntax.mk_word_extract (m, n, t, index_type)
171        end
172      else
173        raise ERR "<z3_builtin_dict._>" "not extract<m> n")),
174    ("bvudiv_i", SmtLib_Theories.K_zero_two wordsSyntax.mk_word_div),
175    ("bvurem_i", SmtLib_Theories.K_zero_two wordsSyntax.mk_word_mod),
176    (* bvudiv0 t *)
177    ("bvudiv0", SmtLib_Theories.K_zero_one (fn t =>
178      let
179        val zero = wordsSyntax.mk_n2w (numSyntax.zero_tm, wordsSyntax.dim_of t)
180      in
181        wordsSyntax.mk_word_div (t, zero)
182      end)),
183    (* bvurem0 t *)
184    ("bvurem0", SmtLib_Theories.K_zero_one (fn t =>
185      let
186        val zero = wordsSyntax.mk_n2w (numSyntax.zero_tm, wordsSyntax.dim_of t)
187      in
188        wordsSyntax.mk_word_mod (t, zero)
189      end)),
190    (* array_extArray[m:n] t1 t2 *)
191    ("_", SmtLib_Theories.zero_two (fn token =>
192      if String.isPrefix "array_ext" token then
193        (fn (t1, t2) => Term.mk_comb (boolSyntax.mk_icomb
194          (Term.prim_mk_const {Thy="HolSmt", Name="array_ext"}, t1), t2))
195      else
196        raise ERR "<z3_builtin_dict._>" "not array_ext...")),
197    (* repeatn t *)
198    ("_", SmtLib_Theories.zero_one (fn token =>
199      if String.isPrefix "repeat" token then
200        let
201          val n = Library.parse_arbnum (String.extract (token, 6, NONE))
202          val n = numSyntax.mk_numeral n
203        in
204          fn t => wordsSyntax.mk_word_replicate (n, t)
205        end
206      else
207        raise ERR "<z3_builtin_dict._>" "not repeat<n>")),
208    (* zero_extendn t *)
209    ("_", SmtLib_Theories.zero_one (fn token =>
210      if String.isPrefix "zero_extend" token then
211        let
212          val n = Library.parse_arbnum (String.extract (token, 11, NONE))
213        in
214          fn t => wordsSyntax.mk_w2w (t, fcpLib.index_type
215            (Arbnum.+ (fcpLib.index_to_num (wordsSyntax.dim_of t), n)))
216        end
217      else
218        raise ERR "<z3_builtin_dict._>" "not zero_extend<n>")),
219    (* sign_extendn t *)
220    ("_", SmtLib_Theories.zero_one (fn token =>
221      if String.isPrefix "sign_extend" token then
222        let
223          val n = Library.parse_arbnum (String.extract (token, 11, NONE))
224        in
225          fn t => wordsSyntax.mk_sw2sw (t, fcpLib.index_type
226            (Arbnum.+ (fcpLib.index_to_num (wordsSyntax.dim_of t), n)))
227        end
228      else
229        raise ERR "<z3_builtin_dict._>" "not sign_extend<n>")),
230    (* rotate_leftn t *)
231    ("_", SmtLib_Theories.zero_one (fn token =>
232      if String.isPrefix "rotate_left" token then
233        let
234          val n = Library.parse_arbnum (String.extract (token, 11, NONE))
235          val n = numSyntax.mk_numeral n
236        in
237          fn t => wordsSyntax.mk_word_rol (t, n)
238        end
239      else
240        raise ERR "<z3_builtin_dict._>" "not rotate_left<n>"))
241  ]
242
243  (***************************************************************************)
244  (* turning terms into Z3 proofterms                                        *)
245  (***************************************************************************)
246
247  (* we use a reference to implement recursion through this dictionary *)
248  val pt_dict = ref (Redblackmap.mkDict String.compare
249    : (string, Term.term list -> proofterm) Redblackmap.dict)
250
251  fun proofterm_of_term t =
252  let
253    val (hd, args) = boolSyntax.strip_comb t
254    val name = Lib.fst (Term.dest_var hd)
255  in
256    Redblackmap.find (!pt_dict, name) args
257      handle Redblackmap.NotFound =>
258        ID (proofterm_id (Lib.fst (Term.dest_var t)))
259      handle Feedback.HOL_ERR _ =>
260        raise ERR "proofterm_of_term" "term does not encode a Z3 proofterm"
261  end
262
263  val zero_prems_pt = SmtLib_Theories.one_arg
264
265  fun one_prem_pt f = SmtLib_Theories.two_args (f o Lib.apfst proofterm_of_term)
266
267  fun two_prems_pt f = SmtLib_Theories.three_args (fn (t1, t2, t3) =>
268    f (proofterm_of_term t1, proofterm_of_term t2, t3))
269
270  fun list_prems_pt f =
271    SmtLib_Theories.two_args (f o Lib.apfst
272      (List.map proofterm_of_term o Lib.fst o listSyntax.dest_list))
273
274  val _ = pt_dict := List.foldl
275    (fn ((key, value), dict) => Redblackmap.insert (dict, key, value))
276    (!pt_dict)
277    [
278      ("and-elim",        one_prem_pt AND_ELIM),
279      ("asserted",        zero_prems_pt ASSERTED),
280      ("commutativity",   zero_prems_pt COMMUTATIVITY),
281      ("def-axiom",       zero_prems_pt DEF_AXIOM),
282      ("elim-unused",     zero_prems_pt ELIM_UNUSED),
283      ("hypothesis",      zero_prems_pt HYPOTHESIS),
284      ("iff-true",        one_prem_pt IFF_TRUE),
285      ("lemma",           one_prem_pt LEMMA),
286      ("monotonicity",    list_prems_pt MONOTONICITY),
287      ("mp",              two_prems_pt MP),
288      ("not-or-elim",     one_prem_pt NOT_OR_ELIM),
289      ("quant-intro",     one_prem_pt QUANT_INTRO),
290      ("rewrite",         zero_prems_pt REWRITE),
291      ("symm",            one_prem_pt SYMM),
292      ("th-lemma-arith",  list_prems_pt TH_LEMMA_ARITH),
293      ("th-lemma-array",  list_prems_pt TH_LEMMA_ARRAY),
294      ("th-lemma-basic",  list_prems_pt TH_LEMMA_BASIC),
295      ("th-lemma-bv",     list_prems_pt TH_LEMMA_BV),
296      ("trans",           two_prems_pt TRANS),
297      ("true-axiom",      zero_prems_pt TRUE_AXIOM),
298      ("unit-resolution", list_prems_pt UNIT_RESOLUTION)
299    ]
300
301  (***************************************************************************)
302  (* parsing of let definitions                                              *)
303  (***************************************************************************)
304
305  (* returns an extended proof; 't' must encode a proofterm *)
306  fun extend_proof proof (id, t) =
307  let
308    val _ = if !Library.trace > 0 andalso
309      Option.isSome (Redblackmap.peek (proof, id)) then
310        WARNING "extend_proof"
311          ("proofterm ID " ^ Int.toString id ^ " defined more than once")
312      else ()
313  in
314    Redblackmap.insert (proof, id, proofterm_of_term t)
315  end
316
317  (* distinguishes between a term definition and a proofterm
318     definition; returns a (possibly extended) dictionary and proof *)
319  fun parse_definition get_token (tydict, tmdict, proof) =
320  let
321    val _ = Library.expect_token "(" (get_token ())
322    val _ = Library.expect_token "(" (get_token ())
323    val name = get_token ()
324    val t = SmtLib_Parser.parse_term get_token (tydict, tmdict)
325    val _ = Library.expect_token ")" (get_token ())
326    val _ = Library.expect_token ")" (get_token ())
327  in
328    if String.isPrefix "@x" name then
329      (* proofterm definition *)
330      let
331        val tmdict = Library.extend_dict ((name, SmtLib_Theories.K_zero_zero
332          (Term.mk_var (name, pt_ty))), tmdict)
333        val proof = extend_proof proof (proofterm_id name, t)
334      in
335        (tmdict, proof)
336      end
337    else
338      (* term definition *)
339      (Library.extend_dict ((name, SmtLib_Theories.K_zero_zero t), tmdict),
340        proof)
341  end
342
343  (* entry point into the parser (i.e., the grammar's start symbol) *)
344  fun parse_proof get_token (tydict, tmdict, proof) (rpars : int) =
345  let
346    val _ = Library.expect_token "(" (get_token ())
347    val head = get_token ()
348  in
349    if head = "let" then
350      let
351        val (tmdict, proof) = parse_definition get_token (tydict, tmdict, proof)
352      in
353        parse_proof get_token (tydict, tmdict, proof) (rpars + 1)
354      end
355    else if head = "error" then (
356      (* some (otherwise valid) proofs are preceded by an error message,
357         which we simply ignore *)
358      get_token ();
359      Library.expect_token ")" (get_token ());
360      parse_proof get_token (tydict, tmdict, proof) rpars
361    ) else
362      let
363        (* undo look-ahead of 2 tokens *)
364        val buffer = ref ["(", head]
365        fun get_token' () =
366          case !buffer of
367            [] => get_token ()
368          | x::xs => (buffer := xs; x)
369        val t = SmtLib_Parser.parse_term get_token' (tydict, tmdict)
370      in
371        (* Z3 assigns no ID to the final proof step; we use ID 0 *)
372        extend_proof proof (0, t) before Lib.funpow rpars
373          (fn () => Library.expect_token ")" (get_token ())) ()
374      end
375  end
376
377in
378
379  (* Similar to 'parse_file' below, but for instreams.  Does not close
380     the instream. *)
381
382  fun parse_stream (tydict : (string, Type.hol_type SmtLib_Parser.parse_fn list)
383    Redblackmap.dict, tmdict : (string, Term.term SmtLib_Parser.parse_fn list)
384    Redblackmap.dict) (instream : TextIO.instream) : proof =
385  let
386    (* union of user-declared names and Z3's inference rule names *)
387    val tmdict = Library.union_dict tmdict z3_builtin_dict
388    (* parse the stream *)
389    val _ = if !Library.trace > 1 then
390        Feedback.HOL_MESG "HolSmtLib: parsing Z3 proof"
391      else ()
392    val get_token = Library.get_token (Library.get_buffered_char instream)
393    val proof = parse_proof get_token
394      (tydict, tmdict, Redblackmap.mkDict Int.compare) 0
395    val _ = if !Library.trace > 0 then
396        WARNING "parse_stream" ("ignoring token '" ^ get_token () ^
397          "' (and perhaps others) after proof")
398          handle Feedback.HOL_ERR _ => ()  (* end of stream, as expected *)
399      else ()
400  in
401    proof
402  end
403
404  (* Function 'parse_file' parses Z3's response to the SMT2
405     (get-proof) command (for an unsatisfiable problem, with proofs
406     enabled in Z3, i.e., using option "PROOF_MODE=2").  It has been
407     tested with proofs generated by Z3 2.13 (which was released in
408     October 2010).  'parse_file' takes three arguments: two
409     dictionaries mapping names of types and terms (namely those
410     declared in the SMT-LIB benchmark) to lists of parsing functions
411     (cf. 'SmtLib_Parser.parse_file'); and the name of the proof
412     file. *)
413
414  fun parse_file (tydict, tmdict) (path : string) : proof =
415  let
416    val instream = TextIO.openIn path
417  in
418    parse_stream (tydict, tmdict) instream
419      before TextIO.closeIn instream
420  end
421
422end  (* local *)
423
424end
425