1(* Copyright (c) 2009-2011 Tjark Weber. All rights reserved. *) 2 3(* Proof reconstruction for Z3: parsing of Z3's proofs *) 4 5structure Z3_ProofParser = 6struct 7 8 (* I tried to implement this parser in ML-Lex/ML-Yacc, but gave up 9 on that -- mainly for two reasons: 1. The whole toolchain/build 10 process gets more complicated. 2. Performance and memory usage of 11 the generated parser were far from satisfactory (probably because 12 my naive definition of the underlying grammar required the parser 13 to maintain a large stack internally). *) 14 15local 16 17 local open HolSmtTheory in end 18 19 open Z3_Proof 20 21 val ERR = Feedback.mk_HOL_ERR "Z3_ProofParser" 22 val WARNING = Feedback.HOL_WARNING "Z3_ProofParser" 23 24 (***************************************************************************) 25 (* auxiliary functions *) 26 (***************************************************************************) 27 28 fun proofterm_id (name : string) : int = 29 if String.isPrefix "@x" name then 30 let 31 val id = Option.valOf (Int.fromString (String.extract (name, 2, NONE))) 32 handle Option.Option => 33 raise ERR "proofterm_id" "'@x' not followed by an integer" 34 in 35 if id < 1 then 36 raise ERR "proofterm_id" "integer less than 1 found" 37 else 38 id 39 end 40 else 41 raise ERR "proofterm_id" "'@x' prefix expected" 42 43 (***************************************************************************) 44 (* parsing Z3 proofterms as terms *) 45 (***************************************************************************) 46 47 (* Z3 proofterms are essentially encoded in SMT-LIB term syntax, so 48 we re-use the SMT-LIB parser. *) 49 50 (* FIXME: However, there is a problem: the last argument to each 51 inference rule is the inferred conclusion (thus, a term), whereas 52 previous arguments are premises (thus, proofterms). 53 Unfortunately, the parser does not know whether it is parsing the 54 last argument until after it has parsed it. We therefore parse 55 proofterms and terms uniformly as HOL terms, encoding the former 56 as terms of type :'pt. Note that the current implementation 57 might parse certain terms (namely those containing functions 58 whose names coincide with Z3 inference rule names) erroneously as 59 proofterms. 60 61 I am hoping that the Z3 proof format will eventually be changed 62 so that this is no longer an issue in the parser. It would 63 suffice to enclose each inference rule's list of premises in 64 parentheses; then the parser would find a ")" token once it has 65 parsed the last premise, and can continue by parsing a term. *) 66 67 val pt_ty = Type.mk_vartype "'pt" 68 69 fun zero_prems name = 70 SmtLib_Theories.K_zero_one (Lib.curry Term.mk_comb (Term.mk_var 71 (name, Type.--> (Type.bool, pt_ty)))) 72 73 fun one_prem name = 74 SmtLib_Theories.K_zero_two (Lib.uncurry (Lib.curry Term.mk_comb o 75 Lib.curry Term.mk_comb (Term.mk_var (name, boolSyntax.list_mk_fun 76 ([pt_ty, Type.bool], pt_ty))))) 77 78 fun two_prems name = 79 let 80 val t = Term.mk_var (name, 81 boolSyntax.list_mk_fun ([pt_ty, pt_ty, Type.bool], pt_ty)) 82 in 83 SmtLib_Theories.K_zero_three (fn (p1, p2, concl) => 84 Term.list_mk_comb (t, [p1, p2, concl])) 85 end 86 87 fun list_prems name = 88 SmtLib_Theories.K_zero_list (Lib.uncurry (Lib.curry Term.mk_comb o 89 (Lib.curry Term.mk_comb (Term.mk_var (name, boolSyntax.list_mk_fun 90 ([listSyntax.mk_list_type pt_ty, Type.bool], pt_ty))))) o 91 Lib.apfst (Lib.C (Lib.curry listSyntax.mk_list) pt_ty) o Lib.front_last) 92 93 val z3_builtin_dict = Library.dict_from_list [ 94 ("and-elim", one_prem "and-elim"), 95 ("asserted", zero_prems "asserted"), 96 ("commutativity", zero_prems "commutativity"), 97 ("def-axiom", zero_prems "def-axiom"), 98 ("elim-unused", zero_prems "elim-unused"), 99 ("hypothesis", zero_prems "hypothesis"), 100 ("iff-true", one_prem "iff-true"), 101 ("lemma", one_prem "lemma"), 102 ("monotonicity", list_prems "monotonicity"), 103 ("mp", two_prems "mp"), 104 ("not-or-elim", one_prem "not-or-elim"), 105 ("quant-intro", one_prem "quant-intro"), 106 ("rewrite", zero_prems "rewrite"), 107 ("symm", one_prem "symm"), 108 ("th-lemma-arith", list_prems "th-lemma-arith"), 109 ("th-lemma-array", list_prems "th-lemma-array"), 110 ("th-lemma-basic", list_prems "th-lemma-basic"), 111 ("th-lemma-bv", list_prems "th-lemma-bv"), 112 ("trans", two_prems "trans"), 113 ("true-axiom", zero_prems "true-axiom"), 114 ("unit-resolution", list_prems "unit-resolution"), 115 116 (* FIXME: I am hoping that the Z3 proof format will eventually be 117 changed to adhere to the SMT-LIB format more strictly, i.e., 118 also for the following operations/constants, so that these 119 additional parsing functions are no longer necessary. *) 120 ("iff", SmtLib_Theories.K_zero_two boolSyntax.mk_eq), 121 ("implies", SmtLib_Theories.K_zero_two boolSyntax.mk_imp), 122 ("~", SmtLib_Theories.K_zero_one intSyntax.mk_negated), 123 ("~", SmtLib_Theories.K_zero_one realSyntax.mk_negated), 124 (* bit-vector constants: bvm[n] *) 125 ("_", SmtLib_Theories.zero_zero (fn token => 126 if String.isPrefix "bv" token then 127 let 128 val args = String.extract (token, 2, NONE) 129 val (value, args) = Lib.pair_of_list (String.fields (Lib.equal #"[") 130 args) 131 val (size, args) = Lib.pair_of_list (String.fields (Lib.equal #"]") 132 args) 133 val _ = args = "" orelse 134 raise ERR "<z3_builtin_dict._>" "not a bit-vector constant" 135 val value = Library.parse_arbnum value 136 val size = Library.parse_arbnum size 137 in 138 wordsSyntax.mk_word (value, size) 139 end 140 else 141 raise ERR "<z3_builtin_dict._>" "not a bit-vector constant")), 142 (* extract[m:n] t *) 143 ("_", SmtLib_Theories.zero_one (fn token => 144 if String.isPrefix "extract[" token then 145 let 146 val args = String.extract (token, 8, NONE) 147 val (m, args) = Lib.pair_of_list (String.fields (Lib.equal #":") args) 148 val (n, args) = Lib.pair_of_list (String.fields (Lib.equal #"]") args) 149 val _ = args = "" orelse 150 raise ERR "<z3_builtin_dict._>" "not extract[m:n]" 151 val m = Library.parse_arbnum m 152 val n = Library.parse_arbnum n 153 val index_type = fcpLib.index_type (Arbnum.plus1 (Arbnum.- (m, n))) 154 val m = numSyntax.mk_numeral m 155 val n = numSyntax.mk_numeral n 156 in 157 fn t => wordsSyntax.mk_word_extract (m, n, t, index_type) 158 end 159 else 160 raise ERR "<z3_builtin_dict._>" "not extract[m:n]")), 161 (* (_ extractm n) t *) 162 ("_", SmtLib_Theories.one_one (fn token => fn n => 163 if String.isPrefix "extract" token then 164 let 165 val m = Library.parse_arbnum (String.extract (token, 7, NONE)) 166 val index_type = fcpLib.index_type (Arbnum.plus1 (Arbnum.- (m, n))) 167 val m = numSyntax.mk_numeral m 168 val n = numSyntax.mk_numeral n 169 in 170 fn t => wordsSyntax.mk_word_extract (m, n, t, index_type) 171 end 172 else 173 raise ERR "<z3_builtin_dict._>" "not extract<m> n")), 174 ("bvudiv_i", SmtLib_Theories.K_zero_two wordsSyntax.mk_word_div), 175 ("bvurem_i", SmtLib_Theories.K_zero_two wordsSyntax.mk_word_mod), 176 (* bvudiv0 t *) 177 ("bvudiv0", SmtLib_Theories.K_zero_one (fn t => 178 let 179 val zero = wordsSyntax.mk_n2w (numSyntax.zero_tm, wordsSyntax.dim_of t) 180 in 181 wordsSyntax.mk_word_div (t, zero) 182 end)), 183 (* bvurem0 t *) 184 ("bvurem0", SmtLib_Theories.K_zero_one (fn t => 185 let 186 val zero = wordsSyntax.mk_n2w (numSyntax.zero_tm, wordsSyntax.dim_of t) 187 in 188 wordsSyntax.mk_word_mod (t, zero) 189 end)), 190 (* array_extArray[m:n] t1 t2 *) 191 ("_", SmtLib_Theories.zero_two (fn token => 192 if String.isPrefix "array_ext" token then 193 (fn (t1, t2) => Term.mk_comb (boolSyntax.mk_icomb 194 (Term.prim_mk_const {Thy="HolSmt", Name="array_ext"}, t1), t2)) 195 else 196 raise ERR "<z3_builtin_dict._>" "not array_ext...")), 197 (* repeatn t *) 198 ("_", SmtLib_Theories.zero_one (fn token => 199 if String.isPrefix "repeat" token then 200 let 201 val n = Library.parse_arbnum (String.extract (token, 6, NONE)) 202 val n = numSyntax.mk_numeral n 203 in 204 fn t => wordsSyntax.mk_word_replicate (n, t) 205 end 206 else 207 raise ERR "<z3_builtin_dict._>" "not repeat<n>")), 208 (* zero_extendn t *) 209 ("_", SmtLib_Theories.zero_one (fn token => 210 if String.isPrefix "zero_extend" token then 211 let 212 val n = Library.parse_arbnum (String.extract (token, 11, NONE)) 213 in 214 fn t => wordsSyntax.mk_w2w (t, fcpLib.index_type 215 (Arbnum.+ (fcpLib.index_to_num (wordsSyntax.dim_of t), n))) 216 end 217 else 218 raise ERR "<z3_builtin_dict._>" "not zero_extend<n>")), 219 (* sign_extendn t *) 220 ("_", SmtLib_Theories.zero_one (fn token => 221 if String.isPrefix "sign_extend" token then 222 let 223 val n = Library.parse_arbnum (String.extract (token, 11, NONE)) 224 in 225 fn t => wordsSyntax.mk_sw2sw (t, fcpLib.index_type 226 (Arbnum.+ (fcpLib.index_to_num (wordsSyntax.dim_of t), n))) 227 end 228 else 229 raise ERR "<z3_builtin_dict._>" "not sign_extend<n>")), 230 (* rotate_leftn t *) 231 ("_", SmtLib_Theories.zero_one (fn token => 232 if String.isPrefix "rotate_left" token then 233 let 234 val n = Library.parse_arbnum (String.extract (token, 11, NONE)) 235 val n = numSyntax.mk_numeral n 236 in 237 fn t => wordsSyntax.mk_word_rol (t, n) 238 end 239 else 240 raise ERR "<z3_builtin_dict._>" "not rotate_left<n>")) 241 ] 242 243 (***************************************************************************) 244 (* turning terms into Z3 proofterms *) 245 (***************************************************************************) 246 247 (* we use a reference to implement recursion through this dictionary *) 248 val pt_dict = ref (Redblackmap.mkDict String.compare 249 : (string, Term.term list -> proofterm) Redblackmap.dict) 250 251 fun proofterm_of_term t = 252 let 253 val (hd, args) = boolSyntax.strip_comb t 254 val name = Lib.fst (Term.dest_var hd) 255 in 256 Redblackmap.find (!pt_dict, name) args 257 handle Redblackmap.NotFound => 258 ID (proofterm_id (Lib.fst (Term.dest_var t))) 259 handle Feedback.HOL_ERR _ => 260 raise ERR "proofterm_of_term" "term does not encode a Z3 proofterm" 261 end 262 263 val zero_prems_pt = SmtLib_Theories.one_arg 264 265 fun one_prem_pt f = SmtLib_Theories.two_args (f o Lib.apfst proofterm_of_term) 266 267 fun two_prems_pt f = SmtLib_Theories.three_args (fn (t1, t2, t3) => 268 f (proofterm_of_term t1, proofterm_of_term t2, t3)) 269 270 fun list_prems_pt f = 271 SmtLib_Theories.two_args (f o Lib.apfst 272 (List.map proofterm_of_term o Lib.fst o listSyntax.dest_list)) 273 274 val _ = pt_dict := List.foldl 275 (fn ((key, value), dict) => Redblackmap.insert (dict, key, value)) 276 (!pt_dict) 277 [ 278 ("and-elim", one_prem_pt AND_ELIM), 279 ("asserted", zero_prems_pt ASSERTED), 280 ("commutativity", zero_prems_pt COMMUTATIVITY), 281 ("def-axiom", zero_prems_pt DEF_AXIOM), 282 ("elim-unused", zero_prems_pt ELIM_UNUSED), 283 ("hypothesis", zero_prems_pt HYPOTHESIS), 284 ("iff-true", one_prem_pt IFF_TRUE), 285 ("lemma", one_prem_pt LEMMA), 286 ("monotonicity", list_prems_pt MONOTONICITY), 287 ("mp", two_prems_pt MP), 288 ("not-or-elim", one_prem_pt NOT_OR_ELIM), 289 ("quant-intro", one_prem_pt QUANT_INTRO), 290 ("rewrite", zero_prems_pt REWRITE), 291 ("symm", one_prem_pt SYMM), 292 ("th-lemma-arith", list_prems_pt TH_LEMMA_ARITH), 293 ("th-lemma-array", list_prems_pt TH_LEMMA_ARRAY), 294 ("th-lemma-basic", list_prems_pt TH_LEMMA_BASIC), 295 ("th-lemma-bv", list_prems_pt TH_LEMMA_BV), 296 ("trans", two_prems_pt TRANS), 297 ("true-axiom", zero_prems_pt TRUE_AXIOM), 298 ("unit-resolution", list_prems_pt UNIT_RESOLUTION) 299 ] 300 301 (***************************************************************************) 302 (* parsing of let definitions *) 303 (***************************************************************************) 304 305 (* returns an extended proof; 't' must encode a proofterm *) 306 fun extend_proof proof (id, t) = 307 let 308 val _ = if !Library.trace > 0 andalso 309 Option.isSome (Redblackmap.peek (proof, id)) then 310 WARNING "extend_proof" 311 ("proofterm ID " ^ Int.toString id ^ " defined more than once") 312 else () 313 in 314 Redblackmap.insert (proof, id, proofterm_of_term t) 315 end 316 317 (* distinguishes between a term definition and a proofterm 318 definition; returns a (possibly extended) dictionary and proof *) 319 fun parse_definition get_token (tydict, tmdict, proof) = 320 let 321 val _ = Library.expect_token "(" (get_token ()) 322 val _ = Library.expect_token "(" (get_token ()) 323 val name = get_token () 324 val t = SmtLib_Parser.parse_term get_token (tydict, tmdict) 325 val _ = Library.expect_token ")" (get_token ()) 326 val _ = Library.expect_token ")" (get_token ()) 327 in 328 if String.isPrefix "@x" name then 329 (* proofterm definition *) 330 let 331 val tmdict = Library.extend_dict ((name, SmtLib_Theories.K_zero_zero 332 (Term.mk_var (name, pt_ty))), tmdict) 333 val proof = extend_proof proof (proofterm_id name, t) 334 in 335 (tmdict, proof) 336 end 337 else 338 (* term definition *) 339 (Library.extend_dict ((name, SmtLib_Theories.K_zero_zero t), tmdict), 340 proof) 341 end 342 343 (* entry point into the parser (i.e., the grammar's start symbol) *) 344 fun parse_proof get_token (tydict, tmdict, proof) (rpars : int) = 345 let 346 val _ = Library.expect_token "(" (get_token ()) 347 val head = get_token () 348 in 349 if head = "let" then 350 let 351 val (tmdict, proof) = parse_definition get_token (tydict, tmdict, proof) 352 in 353 parse_proof get_token (tydict, tmdict, proof) (rpars + 1) 354 end 355 else if head = "error" then ( 356 (* some (otherwise valid) proofs are preceded by an error message, 357 which we simply ignore *) 358 get_token (); 359 Library.expect_token ")" (get_token ()); 360 parse_proof get_token (tydict, tmdict, proof) rpars 361 ) else 362 let 363 (* undo look-ahead of 2 tokens *) 364 val buffer = ref ["(", head] 365 fun get_token' () = 366 case !buffer of 367 [] => get_token () 368 | x::xs => (buffer := xs; x) 369 val t = SmtLib_Parser.parse_term get_token' (tydict, tmdict) 370 in 371 (* Z3 assigns no ID to the final proof step; we use ID 0 *) 372 extend_proof proof (0, t) before Lib.funpow rpars 373 (fn () => Library.expect_token ")" (get_token ())) () 374 end 375 end 376 377in 378 379 (* Similar to 'parse_file' below, but for instreams. Does not close 380 the instream. *) 381 382 fun parse_stream (tydict : (string, Type.hol_type SmtLib_Parser.parse_fn list) 383 Redblackmap.dict, tmdict : (string, Term.term SmtLib_Parser.parse_fn list) 384 Redblackmap.dict) (instream : TextIO.instream) : proof = 385 let 386 (* union of user-declared names and Z3's inference rule names *) 387 val tmdict = Library.union_dict tmdict z3_builtin_dict 388 (* parse the stream *) 389 val _ = if !Library.trace > 1 then 390 Feedback.HOL_MESG "HolSmtLib: parsing Z3 proof" 391 else () 392 val get_token = Library.get_token (Library.get_buffered_char instream) 393 val proof = parse_proof get_token 394 (tydict, tmdict, Redblackmap.mkDict Int.compare) 0 395 val _ = if !Library.trace > 0 then 396 WARNING "parse_stream" ("ignoring token '" ^ get_token () ^ 397 "' (and perhaps others) after proof") 398 handle Feedback.HOL_ERR _ => () (* end of stream, as expected *) 399 else () 400 in 401 proof 402 end 403 404 (* Function 'parse_file' parses Z3's response to the SMT2 405 (get-proof) command (for an unsatisfiable problem, with proofs 406 enabled in Z3, i.e., using option "PROOF_MODE=2"). It has been 407 tested with proofs generated by Z3 2.13 (which was released in 408 October 2010). 'parse_file' takes three arguments: two 409 dictionaries mapping names of types and terms (namely those 410 declared in the SMT-LIB benchmark) to lists of parsing functions 411 (cf. 'SmtLib_Parser.parse_file'); and the name of the proof 412 file. *) 413 414 fun parse_file (tydict, tmdict) (path : string) : proof = 415 let 416 val instream = TextIO.openIn path 417 in 418 parse_stream (tydict, tmdict) instream 419 before TextIO.closeIn instream 420 end 421 422end (* local *) 423 424end 425