1(* Title: HOL/Tools/sat.ML 2 Author: Stephan Merz and Alwen Tiu, QSL Team, LORIA (http://qsl.loria.fr) 3 Author: Tjark Weber, TU Muenchen 4 5Proof reconstruction from SAT solvers. 6 7 Description: 8 This file defines several tactics to invoke a proof-producing 9 SAT solver on a propositional goal in clausal form. 10 11 We use a sequent presentation of clauses to speed up resolution 12 proof reconstruction. 13 We call such clauses "raw clauses", which are of the form 14 [x1, ..., xn, P] |- False 15 (note the use of |- instead of ==>, i.e. of Isabelle's (meta-)hyps here), 16 where each xi is a literal (see also comments in cnf.ML). 17 18 This does not work for goals containing schematic variables! 19 20 The tactic produces a clause representation of the given goal 21 in DIMACS format and invokes a SAT solver, which should return 22 a proof consisting of a sequence of resolution steps, indicating 23 the two input clauses, and resulting in new clauses, leading to 24 the empty clause (i.e. "False"). The tactic replays this proof 25 in Isabelle and thus solves the overall goal. 26 27 There are three SAT tactics available. They differ in the CNF transformation 28 used. "sat_tac" uses naive CNF transformation to transform the theorem to be 29 proved before giving it to the SAT solver. The naive transformation in the 30 worst case can lead to an exponential blow up in formula size. Another 31 tactic, "satx_tac", uses "definitional CNF transformation" which attempts to 32 produce a formula of linear size increase compared to the input formula, at 33 the cost of possibly introducing new variables. See cnf.ML for more 34 comments on the CNF transformation. "rawsat_tac" should be used with 35 caution: no CNF transformation is performed, and the tactic's behavior is 36 undefined if the subgoal is not already given as [| C1; ...; Cn |] ==> False, 37 where each Ci is a disjunction. 38 39 The SAT solver to be used can be set via the "solver" reference. See 40 sat_solvers.ML for possible values, and etc/settings for required (solver- 41 dependent) configuration settings. To replay SAT proofs in Isabelle, you 42 must of course use a proof-producing SAT solver in the first place. 43 44 Proofs are replayed only if "quick_and_dirty" is false. If 45 "quick_and_dirty" is true, the theorem (in case the SAT solver claims its 46 negation to be unsatisfiable) is proved via an oracle. 47*) 48 49signature SAT = 50sig 51 val trace: bool Config.T (* print trace messages *) 52 val solver: string Config.T (* name of SAT solver to be used *) 53 val counter: int Unsynchronized.ref (* output: number of resolution steps during last proof replay *) 54 val rawsat_thm: Proof.context -> cterm list -> thm 55 val rawsat_tac: Proof.context -> int -> tactic 56 val sat_tac: Proof.context -> int -> tactic 57 val satx_tac: Proof.context -> int -> tactic 58end 59 60structure SAT : SAT = 61struct 62 63val trace = Attrib.setup_config_bool \<^binding>\<open>sat_trace\<close> (K false); 64 65fun cond_tracing ctxt msg = 66 if Config.get ctxt trace then tracing (msg ()) else (); 67 68val solver = Attrib.setup_config_string \<^binding>\<open>sat_solver\<close> (K "cdclite"); 69 (*see HOL/Tools/sat_solver.ML for possible values*) 70 71val counter = Unsynchronized.ref 0; 72 73val resolution_thm = 74 @{lemma "(P \<Longrightarrow> False) \<Longrightarrow> (\<not> P \<Longrightarrow> False) \<Longrightarrow> False" by (rule case_split)} 75 76(* ------------------------------------------------------------------------- *) 77(* lit_ord: an order on integers that considers their absolute values only, *) 78(* thereby treating integers that represent the same atom (positively *) 79(* or negatively) as equal *) 80(* ------------------------------------------------------------------------- *) 81 82fun lit_ord (i, j) = int_ord (abs i, abs j); 83 84(* ------------------------------------------------------------------------- *) 85(* CLAUSE: during proof reconstruction, three kinds of clauses are *) 86(* distinguished: *) 87(* 1. NO_CLAUSE: clause not proved (yet) *) 88(* 2. ORIG_CLAUSE: a clause as it occurs in the original problem *) 89(* 3. RAW_CLAUSE: a raw clause, with additional precomputed information *) 90(* (a mapping from int's to its literals) for faster proof *) 91(* reconstruction *) 92(* ------------------------------------------------------------------------- *) 93 94datatype CLAUSE = 95 NO_CLAUSE 96 | ORIG_CLAUSE of thm 97 | RAW_CLAUSE of thm * (int * cterm) list; 98 99(* ------------------------------------------------------------------------- *) 100(* resolve_raw_clauses: given a non-empty list of raw clauses, we fold *) 101(* resolution over the list (starting with its head), i.e. with two raw *) 102(* clauses *) 103(* [P, x1, ..., a, ..., xn] |- False *) 104(* and *) 105(* [Q, y1, ..., a', ..., ym] |- False *) 106(* (where a and a' are dual to each other), we convert the first clause *) 107(* to *) 108(* [P, x1, ..., xn] |- a ==> False , *) 109(* the second clause to *) 110(* [Q, y1, ..., ym] |- a' ==> False *) 111(* and then perform resolution with *) 112(* [| ?P ==> False; ~?P ==> False |] ==> False *) 113(* to produce *) 114(* [P, Q, x1, ..., xn, y1, ..., ym] |- False *) 115(* Each clause is accompanied with an association list mapping integers *) 116(* (positive for positive literals, negative for negative literals, and *) 117(* the same absolute value for dual literals) to the actual literals as *) 118(* cterms. *) 119(* ------------------------------------------------------------------------- *) 120 121fun resolve_raw_clauses _ [] = 122 raise THM ("Proof reconstruction failed (empty list of resolvents)!", 0, []) 123 | resolve_raw_clauses ctxt (c::cs) = 124 let 125 (* merges two sorted lists wrt. 'lit_ord', suppressing duplicates *) 126 fun merge xs [] = xs 127 | merge [] ys = ys 128 | merge (x :: xs) (y :: ys) = 129 (case lit_ord (apply2 fst (x, y)) of 130 LESS => x :: merge xs (y :: ys) 131 | EQUAL => x :: merge xs ys 132 | GREATER => y :: merge (x :: xs) ys) 133 134 (* find out which two hyps are used in the resolution *) 135 fun find_res_hyps ([], _) _ = 136 raise THM ("Proof reconstruction failed (no literal for resolution)!", 0, []) 137 | find_res_hyps (_, []) _ = 138 raise THM ("Proof reconstruction failed (no literal for resolution)!", 0, []) 139 | find_res_hyps (h1 :: hyps1, h2 :: hyps2) acc = 140 (case lit_ord (apply2 fst (h1, h2)) of 141 LESS => find_res_hyps (hyps1, h2 :: hyps2) (h1 :: acc) 142 | EQUAL => 143 let 144 val (i1, chyp1) = h1 145 val (i2, chyp2) = h2 146 in 147 if i1 = ~ i2 then 148 (i1 < 0, chyp1, chyp2, rev acc @ merge hyps1 hyps2) 149 else (* i1 = i2 *) 150 find_res_hyps (hyps1, hyps2) (h1 :: acc) 151 end 152 | GREATER => find_res_hyps (h1 :: hyps1, hyps2) (h2 :: acc)) 153 154 fun resolution (c1, hyps1) (c2, hyps2) = 155 let 156 val _ = 157 cond_tracing ctxt (fn () => 158 "Resolving clause: " ^ Thm.string_of_thm ctxt c1 ^ 159 " (hyps: " ^ ML_Syntax.print_list (Syntax.string_of_term ctxt) (Thm.hyps_of c1) ^ 160 ")\nwith clause: " ^ Thm.string_of_thm ctxt c2 ^ 161 " (hyps: " ^ ML_Syntax.print_list (Syntax.string_of_term ctxt) (Thm.hyps_of c2) ^ ")") 162 163 (* the two literals used for resolution *) 164 val (hyp1_is_neg, hyp1, hyp2, new_hyps) = find_res_hyps (hyps1, hyps2) [] 165 166 val c1' = Thm.implies_intr hyp1 c1 (* Gamma1 |- hyp1 ==> False *) 167 val c2' = Thm.implies_intr hyp2 c2 (* Gamma2 |- hyp2 ==> False *) 168 169 val res_thm = (* |- (lit ==> False) ==> (~lit ==> False) ==> False *) 170 let 171 val cLit = 172 snd (Thm.dest_comb (if hyp1_is_neg then hyp2 else hyp1)) (* strip Trueprop *) 173 in 174 Thm.instantiate ([], [((("P", 0), HOLogic.boolT), cLit)]) resolution_thm 175 end 176 177 val _ = 178 cond_tracing ctxt 179 (fn () => "Resolution theorem: " ^ Thm.string_of_thm ctxt res_thm) 180 181 (* Gamma1, Gamma2 |- False *) 182 val c_new = 183 Thm.implies_elim 184 (Thm.implies_elim res_thm (if hyp1_is_neg then c2' else c1')) 185 (if hyp1_is_neg then c1' else c2') 186 187 val _ = 188 cond_tracing ctxt (fn () => 189 "Resulting clause: " ^ Thm.string_of_thm ctxt c_new ^ 190 " (hyps: " ^ 191 ML_Syntax.print_list (Syntax.string_of_term ctxt) (Thm.hyps_of c_new) ^ ")") 192 193 val _ = Unsynchronized.inc counter 194 in 195 (c_new, new_hyps) 196 end 197 in 198 fold resolution cs c 199 end; 200 201(* ------------------------------------------------------------------------- *) 202(* replay_proof: replays the resolution proof returned by the SAT solver; *) 203(* cf. SAT_Solver.proof for details of the proof format. Updates the *) 204(* 'clauses' array with derived clauses, and returns the derived clause *) 205(* at index 'empty_id' (which should just be "False" if proof *) 206(* reconstruction was successful, with the used clauses as hyps). *) 207(* 'atom_table' must contain an injective mapping from all atoms that *) 208(* occur (as part of a literal) in 'clauses' to positive integers. *) 209(* ------------------------------------------------------------------------- *) 210 211fun replay_proof ctxt atom_table clauses (clause_table, empty_id) = 212 let 213 fun index_of_literal chyp = 214 (case (HOLogic.dest_Trueprop o Thm.term_of) chyp of 215 (Const (\<^const_name>\<open>Not\<close>, _) $ atom) => 216 SOME (~ (the (Termtab.lookup atom_table atom))) 217 | atom => SOME (the (Termtab.lookup atom_table atom))) 218 handle TERM _ => NONE; (* 'chyp' is not a literal *) 219 220 fun prove_clause id = 221 (case Array.sub (clauses, id) of 222 RAW_CLAUSE clause => clause 223 | ORIG_CLAUSE thm => 224 (* convert the original clause *) 225 let 226 val _ = cond_tracing ctxt (fn () => "Using original clause #" ^ string_of_int id) 227 val raw = CNF.clause2raw_thm ctxt thm 228 val hyps = sort (lit_ord o apply2 fst) (map_filter (fn chyp => 229 Option.map (rpair chyp) (index_of_literal chyp)) (Thm.chyps_of raw)) 230 val clause = (raw, hyps) 231 val _ = Array.update (clauses, id, RAW_CLAUSE clause) 232 in 233 clause 234 end 235 | NO_CLAUSE => 236 (* prove the clause, using information from 'clause_table' *) 237 let 238 val _ = cond_tracing ctxt (fn () => "Proving clause #" ^ string_of_int id ^ " ...") 239 val ids = the (Inttab.lookup clause_table id) 240 val clause = resolve_raw_clauses ctxt (map prove_clause ids) 241 val _ = Array.update (clauses, id, RAW_CLAUSE clause) 242 val _ = 243 cond_tracing ctxt 244 (fn () => "Replay chain successful; clause stored at #" ^ string_of_int id) 245 in 246 clause 247 end) 248 249 val _ = counter := 0 250 val empty_clause = fst (prove_clause empty_id) 251 val _ = 252 cond_tracing ctxt (fn () => 253 "Proof reconstruction successful; " ^ 254 string_of_int (!counter) ^ " resolution step(s) total.") 255 in 256 empty_clause 257 end; 258 259(* ------------------------------------------------------------------------- *) 260(* string_of_prop_formula: return a human-readable string representation of *) 261(* a 'prop_formula' (just for tracing) *) 262(* ------------------------------------------------------------------------- *) 263 264fun string_of_prop_formula Prop_Logic.True = "True" 265 | string_of_prop_formula Prop_Logic.False = "False" 266 | string_of_prop_formula (Prop_Logic.BoolVar i) = "x" ^ string_of_int i 267 | string_of_prop_formula (Prop_Logic.Not fm) = "~" ^ string_of_prop_formula fm 268 | string_of_prop_formula (Prop_Logic.Or (fm1, fm2)) = 269 "(" ^ string_of_prop_formula fm1 ^ " v " ^ string_of_prop_formula fm2 ^ ")" 270 | string_of_prop_formula (Prop_Logic.And (fm1, fm2)) = 271 "(" ^ string_of_prop_formula fm1 ^ " & " ^ string_of_prop_formula fm2 ^ ")"; 272 273(* ------------------------------------------------------------------------- *) 274(* rawsat_thm: run external SAT solver with the given clauses. Reconstructs *) 275(* a proof from the resulting proof trace of the SAT solver. The *) 276(* theorem returned is just "False" (with some of the given clauses as *) 277(* hyps). *) 278(* ------------------------------------------------------------------------- *) 279 280fun rawsat_thm ctxt clauses = 281 let 282 (* remove premises that equal "True" *) 283 val clauses' = filter (fn clause => 284 (not_equal \<^term>\<open>True\<close> o HOLogic.dest_Trueprop o Thm.term_of) clause 285 handle TERM ("dest_Trueprop", _) => true) clauses 286 (* remove non-clausal premises -- of course this shouldn't actually *) 287 (* remove anything as long as 'rawsat_tac' is only called after the *) 288 (* premises have been converted to clauses *) 289 val clauses'' = filter (fn clause => 290 ((CNF.is_clause o HOLogic.dest_Trueprop o Thm.term_of) clause 291 handle TERM ("dest_Trueprop", _) => false) 292 orelse 293 (if Context_Position.is_visible ctxt then 294 warning ("Ignoring non-clausal premise " ^ Syntax.string_of_term ctxt (Thm.term_of clause)) 295 else (); false)) clauses' 296 (* remove trivial clauses -- this is necessary because zChaff removes *) 297 (* trivial clauses during preprocessing, and otherwise our clause *) 298 (* numbering would be off *) 299 val nontrivial_clauses = 300 filter (not o CNF.clause_is_trivial o HOLogic.dest_Trueprop o Thm.term_of) clauses'' 301 (* sort clauses according to the term order -- an optimization, *) 302 (* useful because forming the union of hypotheses, as done by *) 303 (* Conjunction.intr_balanced and fold Thm.weaken below, is quadratic for *) 304 (* terms sorted in descending order, while only linear for terms *) 305 (* sorted in ascending order *) 306 val sorted_clauses = sort Thm.fast_term_ord nontrivial_clauses 307 val _ = 308 cond_tracing ctxt (fn () => 309 "Sorted non-trivial clauses:\n" ^ 310 cat_lines (map (Syntax.string_of_term ctxt o Thm.term_of) sorted_clauses)) 311 (* translate clauses from HOL terms to Prop_Logic.prop_formula *) 312 val (fms, atom_table) = 313 fold_map (Prop_Logic.prop_formula_of_term o HOLogic.dest_Trueprop o Thm.term_of) 314 sorted_clauses Termtab.empty 315 val _ = 316 cond_tracing ctxt 317 (fn () => "Invoking SAT solver on clauses:\n" ^ cat_lines (map string_of_prop_formula fms)) 318 val fm = Prop_Logic.all fms 319 fun make_quick_and_dirty_thm () = 320 let 321 val _ = 322 cond_tracing ctxt 323 (fn () => "quick_and_dirty is set: proof reconstruction skipped, using oracle instead") 324 val False_thm = Skip_Proof.make_thm_cterm \<^cprop>\<open>False\<close> 325 in 326 (* 'fold Thm.weaken (rev sorted_clauses)' is linear, while 'fold *) 327 (* Thm.weaken sorted_clauses' would be quadratic, since we sorted *) 328 (* clauses in ascending order (which is linear for *) 329 (* 'Conjunction.intr_balanced', used below) *) 330 fold Thm.weaken (rev sorted_clauses) False_thm 331 end 332 in 333 case 334 let 335 val the_solver = Config.get ctxt solver 336 val _ = cond_tracing ctxt (fn () => "Invoking solver " ^ the_solver) 337 in SAT_Solver.invoke_solver the_solver fm end 338 of 339 SAT_Solver.UNSATISFIABLE (SOME (clause_table, empty_id)) => 340 (cond_tracing ctxt (fn () => 341 "Proof trace from SAT solver:\n" ^ 342 "clauses: " ^ ML_Syntax.print_list 343 (ML_Syntax.print_pair ML_Syntax.print_int (ML_Syntax.print_list ML_Syntax.print_int)) 344 (Inttab.dest clause_table) ^ "\n" ^ 345 "empty clause: " ^ string_of_int empty_id); 346 if Config.get ctxt quick_and_dirty then 347 make_quick_and_dirty_thm () 348 else 349 let 350 (* optimization: convert the given clauses to "[c_1 && ... && c_n] |- c_i"; *) 351 (* this avoids accumulation of hypotheses during resolution *) 352 (* [c_1, ..., c_n] |- c_1 && ... && c_n *) 353 val clauses_thm = Conjunction.intr_balanced (map Thm.assume sorted_clauses) 354 (* [c_1 && ... && c_n] |- c_1 && ... && c_n *) 355 val cnf_cterm = Thm.cprop_of clauses_thm 356 val cnf_thm = Thm.assume cnf_cterm 357 (* [[c_1 && ... && c_n] |- c_1, ..., [c_1 && ... && c_n] |- c_n] *) 358 val cnf_clauses = Conjunction.elim_balanced (length sorted_clauses) cnf_thm 359 (* initialize the clause array with the given clauses *) 360 val max_idx = fst (the (Inttab.max clause_table)) 361 val clause_arr = Array.array (max_idx + 1, NO_CLAUSE) 362 val _ = 363 fold (fn thm => fn idx => (Array.update (clause_arr, idx, ORIG_CLAUSE thm); idx+1)) 364 cnf_clauses 0 365 (* replay the proof to derive the empty clause *) 366 (* [c_1 && ... && c_n] |- False *) 367 val raw_thm = replay_proof ctxt atom_table clause_arr (clause_table, empty_id) 368 in 369 (* [c_1, ..., c_n] |- False *) 370 Thm.implies_elim (Thm.implies_intr cnf_cterm raw_thm) clauses_thm 371 end) 372 | SAT_Solver.UNSATISFIABLE NONE => 373 if Config.get ctxt quick_and_dirty then 374 (if Context_Position.is_visible ctxt then 375 warning "SAT solver claims the formula to be unsatisfiable, but did not provide a proof" 376 else (); 377 make_quick_and_dirty_thm ()) 378 else 379 raise THM ("SAT solver claims the formula to be unsatisfiable, but did not provide a proof", 0, []) 380 | SAT_Solver.SATISFIABLE assignment => 381 let 382 val msg = 383 "SAT solver found a countermodel:\n" ^ 384 (commas o map (fn (term, idx) => 385 Syntax.string_of_term_global \<^theory> term ^ ": " ^ 386 (case assignment idx of NONE => "arbitrary" 387 | SOME true => "true" | SOME false => "false"))) 388 (Termtab.dest atom_table) 389 in 390 raise THM (msg, 0, []) 391 end 392 | SAT_Solver.UNKNOWN => 393 raise THM ("SAT solver failed to decide the formula", 0, []) 394 end; 395 396(* ------------------------------------------------------------------------- *) 397(* Tactics *) 398(* ------------------------------------------------------------------------- *) 399 400(* ------------------------------------------------------------------------- *) 401(* rawsat_tac: solves the i-th subgoal of the proof state; this subgoal *) 402(* should be of the form *) 403(* [| c1; c2; ...; ck |] ==> False *) 404(* where each cj is a non-empty clause (i.e. a disjunction of literals) *) 405(* or "True" *) 406(* ------------------------------------------------------------------------- *) 407 408fun rawsat_tac ctxt i = 409 Subgoal.FOCUS (fn {context = ctxt', prems, ...} => 410 resolve_tac ctxt' [rawsat_thm ctxt' (map Thm.cprop_of prems)] 1) ctxt i; 411 412(* ------------------------------------------------------------------------- *) 413(* pre_cnf_tac: converts the i-th subgoal *) 414(* [| A1 ; ... ; An |] ==> B *) 415(* to *) 416(* [| A1; ... ; An ; ~B |] ==> False *) 417(* (handling meta-logical connectives in B properly before negating), *) 418(* then replaces meta-logical connectives in the premises (i.e. "==>", *) 419(* "!!" and "==") by connectives of the HOL object-logic (i.e. by *) 420(* "-->", "!", and "="), then performs beta-eta-normalization on the *) 421(* subgoal *) 422(* ------------------------------------------------------------------------- *) 423 424fun pre_cnf_tac ctxt = 425 resolve_tac ctxt @{thms ccontr} THEN' 426 Object_Logic.atomize_prems_tac ctxt THEN' 427 CONVERSION Drule.beta_eta_conversion; 428 429(* ------------------------------------------------------------------------- *) 430(* cnfsat_tac: checks if the empty clause "False" occurs among the premises; *) 431(* if not, eliminates conjunctions (i.e. each clause of the CNF formula *) 432(* becomes a separate premise), then applies 'rawsat_tac' to solve the *) 433(* subgoal *) 434(* ------------------------------------------------------------------------- *) 435 436fun cnfsat_tac ctxt i = 437 (eresolve_tac ctxt [FalseE] i) ORELSE 438 (REPEAT_DETERM (eresolve_tac ctxt [conjE] i) THEN rawsat_tac ctxt i); 439 440(* ------------------------------------------------------------------------- *) 441(* cnfxsat_tac: checks if the empty clause "False" occurs among the *) 442(* premises; if not, eliminates conjunctions (i.e. each clause of the *) 443(* CNF formula becomes a separate premise) and existential quantifiers, *) 444(* then applies 'rawsat_tac' to solve the subgoal *) 445(* ------------------------------------------------------------------------- *) 446 447fun cnfxsat_tac ctxt i = 448 (eresolve_tac ctxt [FalseE] i) ORELSE 449 (REPEAT_DETERM (eresolve_tac ctxt [conjE] i ORELSE eresolve_tac ctxt [exE] i) THEN 450 rawsat_tac ctxt i); 451 452(* ------------------------------------------------------------------------- *) 453(* sat_tac: tactic for calling an external SAT solver, taking as input an *) 454(* arbitrary formula. The input is translated to CNF, possibly causing *) 455(* an exponential blowup. *) 456(* ------------------------------------------------------------------------- *) 457 458fun sat_tac ctxt i = 459 pre_cnf_tac ctxt i THEN CNF.cnf_rewrite_tac ctxt i THEN cnfsat_tac ctxt i; 460 461(* ------------------------------------------------------------------------- *) 462(* satx_tac: tactic for calling an external SAT solver, taking as input an *) 463(* arbitrary formula. The input is translated to CNF, possibly *) 464(* introducing new literals. *) 465(* ------------------------------------------------------------------------- *) 466 467fun satx_tac ctxt i = 468 pre_cnf_tac ctxt i THEN CNF.cnfx_rewrite_tac ctxt i THEN cnfxsat_tac ctxt i; 469 470end; 471