1(*  Title:      HOL/Tools/sat.ML
2    Author:     Stephan Merz and Alwen Tiu, QSL Team, LORIA (http://qsl.loria.fr)
3    Author:     Tjark Weber, TU Muenchen
4
5Proof reconstruction from SAT solvers.
6
7  Description:
8    This file defines several tactics to invoke a proof-producing
9    SAT solver on a propositional goal in clausal form.
10
11    We use a sequent presentation of clauses to speed up resolution
12    proof reconstruction.
13    We call such clauses "raw clauses", which are of the form
14          [x1, ..., xn, P] |- False
15    (note the use of |- instead of ==>, i.e. of Isabelle's (meta-)hyps here),
16    where each xi is a literal (see also comments in cnf.ML).
17
18    This does not work for goals containing schematic variables!
19
20      The tactic produces a clause representation of the given goal
21      in DIMACS format and invokes a SAT solver, which should return
22      a proof consisting of a sequence of resolution steps, indicating
23      the two input clauses, and resulting in new clauses, leading to
24      the empty clause (i.e. "False").  The tactic replays this proof
25      in Isabelle and thus solves the overall goal.
26
27  There are three SAT tactics available.  They differ in the CNF transformation
28  used. "sat_tac" uses naive CNF transformation to transform the theorem to be
29  proved before giving it to the SAT solver.  The naive transformation in the
30  worst case can lead to an exponential blow up in formula size.  Another
31  tactic, "satx_tac", uses "definitional CNF transformation" which attempts to
32  produce a formula of linear size increase compared to the input formula, at
33  the cost of possibly introducing new variables.  See cnf.ML for more
34  comments on the CNF transformation.  "rawsat_tac" should be used with
35  caution: no CNF transformation is performed, and the tactic's behavior is
36  undefined if the subgoal is not already given as [| C1; ...; Cn |] ==> False,
37  where each Ci is a disjunction.
38
39  The SAT solver to be used can be set via the "solver" reference.  See
40  sat_solvers.ML for possible values, and etc/settings for required (solver-
41  dependent) configuration settings.  To replay SAT proofs in Isabelle, you
42  must of course use a proof-producing SAT solver in the first place.
43
44  Proofs are replayed only if "quick_and_dirty" is false.  If
45  "quick_and_dirty" is true, the theorem (in case the SAT solver claims its
46  negation to be unsatisfiable) is proved via an oracle.
47*)
48
49signature SAT =
50sig
51  val trace: bool Config.T  (* print trace messages *)
52  val solver: string Config.T  (* name of SAT solver to be used *)
53  val counter: int Unsynchronized.ref     (* output: number of resolution steps during last proof replay *)
54  val rawsat_thm: Proof.context -> cterm list -> thm
55  val rawsat_tac: Proof.context -> int -> tactic
56  val sat_tac: Proof.context -> int -> tactic
57  val satx_tac: Proof.context -> int -> tactic
58end
59
60structure SAT : SAT =
61struct
62
63val trace = Attrib.setup_config_bool \<^binding>\<open>sat_trace\<close> (K false);
64
65fun cond_tracing ctxt msg =
66  if Config.get ctxt trace then tracing (msg ()) else ();
67
68val solver = Attrib.setup_config_string \<^binding>\<open>sat_solver\<close> (K "cdclite");
69  (*see HOL/Tools/sat_solver.ML for possible values*)
70
71val counter = Unsynchronized.ref 0;
72
73val resolution_thm =
74  @{lemma "(P \<Longrightarrow> False) \<Longrightarrow> (\<not> P \<Longrightarrow> False) \<Longrightarrow> False" by (rule case_split)}
75
76(* ------------------------------------------------------------------------- *)
77(* lit_ord: an order on integers that considers their absolute values only,  *)
78(*      thereby treating integers that represent the same atom (positively   *)
79(*      or negatively) as equal                                              *)
80(* ------------------------------------------------------------------------- *)
81
82fun lit_ord (i, j) = int_ord (abs i, abs j);
83
84(* ------------------------------------------------------------------------- *)
85(* CLAUSE: during proof reconstruction, three kinds of clauses are           *)
86(*      distinguished:                                                       *)
87(*      1. NO_CLAUSE: clause not proved (yet)                                *)
88(*      2. ORIG_CLAUSE: a clause as it occurs in the original problem        *)
89(*      3. RAW_CLAUSE: a raw clause, with additional precomputed information *)
90(*         (a mapping from int's to its literals) for faster proof           *)
91(*         reconstruction                                                    *)
92(* ------------------------------------------------------------------------- *)
93
94datatype CLAUSE =
95    NO_CLAUSE
96  | ORIG_CLAUSE of thm
97  | RAW_CLAUSE of thm * (int * cterm) list;
98
99(* ------------------------------------------------------------------------- *)
100(* resolve_raw_clauses: given a non-empty list of raw clauses, we fold       *)
101(*      resolution over the list (starting with its head), i.e. with two raw *)
102(*      clauses                                                              *)
103(*        [P, x1, ..., a, ..., xn] |- False                                  *)
104(*      and                                                                  *)
105(*        [Q, y1, ..., a', ..., ym] |- False                                 *)
106(*      (where a and a' are dual to each other), we convert the first clause *)
107(*      to                                                                   *)
108(*        [P, x1, ..., xn] |- a ==> False ,                                  *)
109(*      the second clause to                                                 *)
110(*        [Q, y1, ..., ym] |- a' ==> False                                   *)
111(*      and then perform resolution with                                     *)
112(*        [| ?P ==> False; ~?P ==> False |] ==> False                        *)
113(*      to produce                                                           *)
114(*        [P, Q, x1, ..., xn, y1, ..., ym] |- False                          *)
115(*      Each clause is accompanied with an association list mapping integers *)
116(*      (positive for positive literals, negative for negative literals, and *)
117(*      the same absolute value for dual literals) to the actual literals as *)
118(*      cterms.                                                              *)
119(* ------------------------------------------------------------------------- *)
120
121fun resolve_raw_clauses _ [] =
122      raise THM ("Proof reconstruction failed (empty list of resolvents)!", 0, [])
123  | resolve_raw_clauses ctxt (c::cs) =
124      let
125        (* merges two sorted lists wrt. 'lit_ord', suppressing duplicates *)
126        fun merge xs [] = xs
127          | merge [] ys = ys
128          | merge (x :: xs) (y :: ys) =
129              (case lit_ord (apply2 fst (x, y)) of
130                LESS => x :: merge xs (y :: ys)
131              | EQUAL => x :: merge xs ys
132              | GREATER => y :: merge (x :: xs) ys)
133
134        (* find out which two hyps are used in the resolution *)
135        fun find_res_hyps ([], _) _ =
136              raise THM ("Proof reconstruction failed (no literal for resolution)!", 0, [])
137          | find_res_hyps (_, []) _ =
138              raise THM ("Proof reconstruction failed (no literal for resolution)!", 0, [])
139          | find_res_hyps (h1 :: hyps1, h2 :: hyps2) acc =
140              (case lit_ord  (apply2 fst (h1, h2)) of
141                LESS  => find_res_hyps (hyps1, h2 :: hyps2) (h1 :: acc)
142              | EQUAL =>
143                  let
144                    val (i1, chyp1) = h1
145                    val (i2, chyp2) = h2
146                  in
147                    if i1 = ~ i2 then
148                      (i1 < 0, chyp1, chyp2, rev acc @ merge hyps1 hyps2)
149                    else (* i1 = i2 *)
150                      find_res_hyps (hyps1, hyps2) (h1 :: acc)
151                  end
152          | GREATER => find_res_hyps (h1 :: hyps1, hyps2) (h2 :: acc))
153
154        fun resolution (c1, hyps1) (c2, hyps2) =
155          let
156            val _ =
157              cond_tracing ctxt (fn () =>
158                "Resolving clause: " ^ Thm.string_of_thm ctxt c1 ^
159                " (hyps: " ^ ML_Syntax.print_list (Syntax.string_of_term ctxt) (Thm.hyps_of c1) ^
160                ")\nwith clause: " ^ Thm.string_of_thm ctxt c2 ^
161                " (hyps: " ^ ML_Syntax.print_list (Syntax.string_of_term ctxt) (Thm.hyps_of c2) ^ ")")
162
163            (* the two literals used for resolution *)
164            val (hyp1_is_neg, hyp1, hyp2, new_hyps) = find_res_hyps (hyps1, hyps2) []
165
166            val c1' = Thm.implies_intr hyp1 c1  (* Gamma1 |- hyp1 ==> False *)
167            val c2' = Thm.implies_intr hyp2 c2  (* Gamma2 |- hyp2 ==> False *)
168
169            val res_thm =  (* |- (lit ==> False) ==> (~lit ==> False) ==> False *)
170              let
171                val cLit =
172                  snd (Thm.dest_comb (if hyp1_is_neg then hyp2 else hyp1))  (* strip Trueprop *)
173              in
174                Thm.instantiate ([], [((("P", 0), HOLogic.boolT), cLit)]) resolution_thm
175              end
176
177            val _ =
178              cond_tracing ctxt
179                (fn () => "Resolution theorem: " ^ Thm.string_of_thm ctxt res_thm)
180
181            (* Gamma1, Gamma2 |- False *)
182            val c_new =
183              Thm.implies_elim
184                (Thm.implies_elim res_thm (if hyp1_is_neg then c2' else c1'))
185                (if hyp1_is_neg then c1' else c2')
186
187            val _ =
188              cond_tracing ctxt (fn () =>
189                "Resulting clause: " ^ Thm.string_of_thm ctxt c_new ^
190                " (hyps: " ^
191                ML_Syntax.print_list (Syntax.string_of_term ctxt) (Thm.hyps_of c_new) ^ ")")
192
193            val _ = Unsynchronized.inc counter
194          in
195            (c_new, new_hyps)
196          end
197        in
198          fold resolution cs c
199        end;
200
201(* ------------------------------------------------------------------------- *)
202(* replay_proof: replays the resolution proof returned by the SAT solver;    *)
203(*      cf. SAT_Solver.proof for details of the proof format.  Updates the   *)
204(*      'clauses' array with derived clauses, and returns the derived clause *)
205(*      at index 'empty_id' (which should just be "False" if proof           *)
206(*      reconstruction was successful, with the used clauses as hyps).       *)
207(*      'atom_table' must contain an injective mapping from all atoms that   *)
208(*      occur (as part of a literal) in 'clauses' to positive integers.      *)
209(* ------------------------------------------------------------------------- *)
210
211fun replay_proof ctxt atom_table clauses (clause_table, empty_id) =
212  let
213    fun index_of_literal chyp =
214      (case (HOLogic.dest_Trueprop o Thm.term_of) chyp of
215        (Const (\<^const_name>\<open>Not\<close>, _) $ atom) =>
216          SOME (~ (the (Termtab.lookup atom_table atom)))
217      | atom => SOME (the (Termtab.lookup atom_table atom)))
218      handle TERM _ => NONE;  (* 'chyp' is not a literal *)
219
220    fun prove_clause id =
221      (case Array.sub (clauses, id) of
222        RAW_CLAUSE clause => clause
223      | ORIG_CLAUSE thm =>
224          (* convert the original clause *)
225          let
226            val _ = cond_tracing ctxt (fn () => "Using original clause #" ^ string_of_int id)
227            val raw = CNF.clause2raw_thm ctxt thm
228            val hyps = sort (lit_ord o apply2 fst) (map_filter (fn chyp =>
229              Option.map (rpair chyp) (index_of_literal chyp)) (Thm.chyps_of raw))
230            val clause = (raw, hyps)
231            val _ = Array.update (clauses, id, RAW_CLAUSE clause)
232          in
233            clause
234          end
235      | NO_CLAUSE =>
236          (* prove the clause, using information from 'clause_table' *)
237          let
238            val _ = cond_tracing ctxt (fn () => "Proving clause #" ^ string_of_int id ^ " ...")
239            val ids = the (Inttab.lookup clause_table id)
240            val clause = resolve_raw_clauses ctxt (map prove_clause ids)
241            val _ = Array.update (clauses, id, RAW_CLAUSE clause)
242            val _ =
243              cond_tracing ctxt
244                (fn () => "Replay chain successful; clause stored at #" ^ string_of_int id)
245          in
246            clause
247          end)
248
249    val _ = counter := 0
250    val empty_clause = fst (prove_clause empty_id)
251    val _ =
252      cond_tracing ctxt (fn () =>
253        "Proof reconstruction successful; " ^
254        string_of_int (!counter) ^ " resolution step(s) total.")
255  in
256    empty_clause
257  end;
258
259(* ------------------------------------------------------------------------- *)
260(* string_of_prop_formula: return a human-readable string representation of  *)
261(*      a 'prop_formula' (just for tracing)                                  *)
262(* ------------------------------------------------------------------------- *)
263
264fun string_of_prop_formula Prop_Logic.True = "True"
265  | string_of_prop_formula Prop_Logic.False = "False"
266  | string_of_prop_formula (Prop_Logic.BoolVar i) = "x" ^ string_of_int i
267  | string_of_prop_formula (Prop_Logic.Not fm) = "~" ^ string_of_prop_formula fm
268  | string_of_prop_formula (Prop_Logic.Or (fm1, fm2)) =
269      "(" ^ string_of_prop_formula fm1 ^ " v " ^ string_of_prop_formula fm2 ^ ")"
270  | string_of_prop_formula (Prop_Logic.And (fm1, fm2)) =
271      "(" ^ string_of_prop_formula fm1 ^ " & " ^ string_of_prop_formula fm2 ^ ")";
272
273(* ------------------------------------------------------------------------- *)
274(* rawsat_thm: run external SAT solver with the given clauses.  Reconstructs *)
275(*      a proof from the resulting proof trace of the SAT solver.  The       *)
276(*      theorem returned is just "False" (with some of the given clauses as  *)
277(*      hyps).                                                               *)
278(* ------------------------------------------------------------------------- *)
279
280fun rawsat_thm ctxt clauses =
281  let
282    (* remove premises that equal "True" *)
283    val clauses' = filter (fn clause =>
284      (not_equal \<^term>\<open>True\<close> o HOLogic.dest_Trueprop o Thm.term_of) clause
285        handle TERM ("dest_Trueprop", _) => true) clauses
286    (* remove non-clausal premises -- of course this shouldn't actually   *)
287    (* remove anything as long as 'rawsat_tac' is only called after the   *)
288    (* premises have been converted to clauses                            *)
289    val clauses'' = filter (fn clause =>
290      ((CNF.is_clause o HOLogic.dest_Trueprop o Thm.term_of) clause
291        handle TERM ("dest_Trueprop", _) => false)
292      orelse
293       (if Context_Position.is_visible ctxt then
294          warning ("Ignoring non-clausal premise " ^ Syntax.string_of_term ctxt (Thm.term_of clause))
295        else (); false)) clauses'
296    (* remove trivial clauses -- this is necessary because zChaff removes *)
297    (* trivial clauses during preprocessing, and otherwise our clause     *)
298    (* numbering would be off                                             *)
299    val nontrivial_clauses =
300      filter (not o CNF.clause_is_trivial o HOLogic.dest_Trueprop o Thm.term_of) clauses''
301    (* sort clauses according to the term order -- an optimization,       *)
302    (* useful because forming the union of hypotheses, as done by         *)
303    (* Conjunction.intr_balanced and fold Thm.weaken below, is quadratic for *)
304    (* terms sorted in descending order, while only linear for terms      *)
305    (* sorted in ascending order                                          *)
306    val sorted_clauses = sort Thm.fast_term_ord nontrivial_clauses
307    val _ =
308      cond_tracing ctxt (fn () =>
309        "Sorted non-trivial clauses:\n" ^
310        cat_lines (map (Syntax.string_of_term ctxt o Thm.term_of) sorted_clauses))
311    (* translate clauses from HOL terms to Prop_Logic.prop_formula *)
312    val (fms, atom_table) =
313      fold_map (Prop_Logic.prop_formula_of_term o HOLogic.dest_Trueprop o Thm.term_of)
314        sorted_clauses Termtab.empty
315    val _ =
316      cond_tracing ctxt
317        (fn () => "Invoking SAT solver on clauses:\n" ^ cat_lines (map string_of_prop_formula fms))
318    val fm = Prop_Logic.all fms
319    fun make_quick_and_dirty_thm () =
320      let
321        val _ =
322          cond_tracing ctxt
323            (fn () => "quick_and_dirty is set: proof reconstruction skipped, using oracle instead")
324        val False_thm = Skip_Proof.make_thm_cterm \<^cprop>\<open>False\<close>
325      in
326        (* 'fold Thm.weaken (rev sorted_clauses)' is linear, while 'fold    *)
327        (* Thm.weaken sorted_clauses' would be quadratic, since we sorted   *)
328        (* clauses in ascending order (which is linear for                  *)
329        (* 'Conjunction.intr_balanced', used below)                         *)
330        fold Thm.weaken (rev sorted_clauses) False_thm
331      end
332  in
333    case
334      let
335        val the_solver = Config.get ctxt solver
336        val _ = cond_tracing ctxt (fn () => "Invoking solver " ^ the_solver)
337      in SAT_Solver.invoke_solver the_solver fm end
338    of
339      SAT_Solver.UNSATISFIABLE (SOME (clause_table, empty_id)) =>
340       (cond_tracing ctxt (fn () =>
341          "Proof trace from SAT solver:\n" ^
342          "clauses: " ^ ML_Syntax.print_list
343            (ML_Syntax.print_pair ML_Syntax.print_int (ML_Syntax.print_list ML_Syntax.print_int))
344            (Inttab.dest clause_table) ^ "\n" ^
345          "empty clause: " ^ string_of_int empty_id);
346        if Config.get ctxt quick_and_dirty then
347          make_quick_and_dirty_thm ()
348        else
349          let
350            (* optimization: convert the given clauses to "[c_1 && ... && c_n] |- c_i";  *)
351            (*               this avoids accumulation of hypotheses during resolution    *)
352            (* [c_1, ..., c_n] |- c_1 && ... && c_n *)
353            val clauses_thm = Conjunction.intr_balanced (map Thm.assume sorted_clauses)
354            (* [c_1 && ... && c_n] |- c_1 && ... && c_n *)
355            val cnf_cterm = Thm.cprop_of clauses_thm
356            val cnf_thm = Thm.assume cnf_cterm
357            (* [[c_1 && ... && c_n] |- c_1, ..., [c_1 && ... && c_n] |- c_n] *)
358            val cnf_clauses = Conjunction.elim_balanced (length sorted_clauses) cnf_thm
359            (* initialize the clause array with the given clauses *)
360            val max_idx = fst (the (Inttab.max clause_table))
361            val clause_arr = Array.array (max_idx + 1, NO_CLAUSE)
362            val _ =
363              fold (fn thm => fn idx => (Array.update (clause_arr, idx, ORIG_CLAUSE thm); idx+1))
364                cnf_clauses 0
365            (* replay the proof to derive the empty clause *)
366            (* [c_1 && ... && c_n] |- False *)
367            val raw_thm = replay_proof ctxt atom_table clause_arr (clause_table, empty_id)
368          in
369            (* [c_1, ..., c_n] |- False *)
370            Thm.implies_elim (Thm.implies_intr cnf_cterm raw_thm) clauses_thm
371          end)
372    | SAT_Solver.UNSATISFIABLE NONE =>
373        if Config.get ctxt quick_and_dirty then
374         (if Context_Position.is_visible ctxt then
375            warning "SAT solver claims the formula to be unsatisfiable, but did not provide a proof"
376          else ();
377          make_quick_and_dirty_thm ())
378        else
379          raise THM ("SAT solver claims the formula to be unsatisfiable, but did not provide a proof", 0, [])
380    | SAT_Solver.SATISFIABLE assignment =>
381        let
382          val msg =
383            "SAT solver found a countermodel:\n" ^
384            (commas o map (fn (term, idx) =>
385                Syntax.string_of_term_global \<^theory> term ^ ": " ^
386                  (case assignment idx of NONE => "arbitrary"
387                  | SOME true => "true" | SOME false => "false")))
388              (Termtab.dest atom_table)
389        in
390          raise THM (msg, 0, [])
391        end
392    | SAT_Solver.UNKNOWN =>
393        raise THM ("SAT solver failed to decide the formula", 0, [])
394  end;
395
396(* ------------------------------------------------------------------------- *)
397(* Tactics                                                                   *)
398(* ------------------------------------------------------------------------- *)
399
400(* ------------------------------------------------------------------------- *)
401(* rawsat_tac: solves the i-th subgoal of the proof state; this subgoal      *)
402(*      should be of the form                                                *)
403(*        [| c1; c2; ...; ck |] ==> False                                    *)
404(*      where each cj is a non-empty clause (i.e. a disjunction of literals) *)
405(*      or "True"                                                            *)
406(* ------------------------------------------------------------------------- *)
407
408fun rawsat_tac ctxt i =
409  Subgoal.FOCUS (fn {context = ctxt', prems, ...} =>
410    resolve_tac ctxt' [rawsat_thm ctxt' (map Thm.cprop_of prems)] 1) ctxt i;
411
412(* ------------------------------------------------------------------------- *)
413(* pre_cnf_tac: converts the i-th subgoal                                    *)
414(*        [| A1 ; ... ; An |] ==> B                                          *)
415(*      to                                                                   *)
416(*        [| A1; ... ; An ; ~B |] ==> False                                  *)
417(*      (handling meta-logical connectives in B properly before negating),   *)
418(*      then replaces meta-logical connectives in the premises (i.e. "==>",  *)
419(*      "!!" and "==") by connectives of the HOL object-logic (i.e. by       *)
420(*      "-->", "!", and "="), then performs beta-eta-normalization on the    *)
421(*      subgoal                                                              *)
422(* ------------------------------------------------------------------------- *)
423
424fun pre_cnf_tac ctxt =
425  resolve_tac ctxt @{thms ccontr} THEN'
426  Object_Logic.atomize_prems_tac ctxt THEN'
427  CONVERSION Drule.beta_eta_conversion;
428
429(* ------------------------------------------------------------------------- *)
430(* cnfsat_tac: checks if the empty clause "False" occurs among the premises; *)
431(*      if not, eliminates conjunctions (i.e. each clause of the CNF formula *)
432(*      becomes a separate premise), then applies 'rawsat_tac' to solve the  *)
433(*      subgoal                                                              *)
434(* ------------------------------------------------------------------------- *)
435
436fun cnfsat_tac ctxt i =
437  (eresolve_tac  ctxt [FalseE] i) ORELSE
438  (REPEAT_DETERM (eresolve_tac ctxt [conjE] i) THEN rawsat_tac ctxt i);
439
440(* ------------------------------------------------------------------------- *)
441(* cnfxsat_tac: checks if the empty clause "False" occurs among the          *)
442(*      premises; if not, eliminates conjunctions (i.e. each clause of the   *)
443(*      CNF formula becomes a separate premise) and existential quantifiers, *)
444(*      then applies 'rawsat_tac' to solve the subgoal                       *)
445(* ------------------------------------------------------------------------- *)
446
447fun cnfxsat_tac ctxt i =
448  (eresolve_tac ctxt [FalseE] i) ORELSE
449    (REPEAT_DETERM (eresolve_tac ctxt [conjE] i ORELSE eresolve_tac ctxt [exE] i) THEN
450      rawsat_tac ctxt i);
451
452(* ------------------------------------------------------------------------- *)
453(* sat_tac: tactic for calling an external SAT solver, taking as input an    *)
454(*      arbitrary formula.  The input is translated to CNF, possibly causing *)
455(*      an exponential blowup.                                               *)
456(* ------------------------------------------------------------------------- *)
457
458fun sat_tac ctxt i =
459  pre_cnf_tac ctxt i THEN CNF.cnf_rewrite_tac ctxt i THEN cnfsat_tac ctxt i;
460
461(* ------------------------------------------------------------------------- *)
462(* satx_tac: tactic for calling an external SAT solver, taking as input an   *)
463(*      arbitrary formula.  The input is translated to CNF, possibly         *)
464(*      introducing new literals.                                            *)
465(* ------------------------------------------------------------------------- *)
466
467fun satx_tac ctxt i =
468  pre_cnf_tac ctxt i THEN CNF.cnfx_rewrite_tac ctxt i THEN cnfxsat_tac ctxt i;
469
470end;
471