1(* Title: HOL/Tools/Sledgehammer/sledgehammer_prover_atp.ML 2 Author: Fabian Immler, TU Muenchen 3 Author: Makarius 4 Author: Jasmin Blanchette, TU Muenchen 5 6ATPs as Sledgehammer provers. 7*) 8 9signature SLEDGEHAMMER_PROVER_ATP = 10sig 11 type mode = Sledgehammer_Prover.mode 12 type prover = Sledgehammer_Prover.prover 13 14 val atp_dest_dir : string Config.T 15 val atp_problem_prefix : string Config.T 16 val atp_completish : int Config.T 17 val atp_full_names : bool Config.T 18 19 val is_ho_atp : Proof.context -> string -> bool 20 21 val run_atp : mode -> string -> prover 22end; 23 24structure Sledgehammer_Prover_ATP : SLEDGEHAMMER_PROVER_ATP = 25struct 26 27open ATP_Util 28open ATP_Problem 29open ATP_Proof 30open ATP_Problem_Generate 31open ATP_Proof_Reconstruct 32open ATP_Satallax 33open ATP_Systems 34open Sledgehammer_Util 35open Sledgehammer_Proof_Methods 36open Sledgehammer_Isar 37open Sledgehammer_Prover 38 39(* Empty string means create files in Isabelle's temporary files directory. *) 40val atp_dest_dir = Attrib.setup_config_string \<^binding>\<open>sledgehammer_atp_dest_dir\<close> (K "") 41val atp_problem_prefix = 42 Attrib.setup_config_string \<^binding>\<open>sledgehammer_atp_problem_prefix\<close> (K "prob") 43val atp_completish = Attrib.setup_config_int \<^binding>\<open>sledgehammer_atp_completish\<close> (K 0) 44(* In addition to being easier to read, readable names are often much shorter, especially if types 45 are mangled in names. This makes a difference for some provers (e.g., E). For these reason, short 46 names are enabled by default. *) 47val atp_full_names = Attrib.setup_config_bool \<^binding>\<open>sledgehammer_atp_full_names\<close> (K false) 48 49fun is_atp_of_format is_format ctxt name = 50 let val thy = Proof_Context.theory_of ctxt in 51 (case try (get_atp thy) name of 52 SOME config => 53 exists (fn (_, ((_, format, _, _, _), _)) => is_format format) (#best_slices (config ()) ctxt) 54 | NONE => false) 55 end 56 57val is_ho_atp = is_atp_of_format is_format_higher_order 58 59fun choose_type_enc strictness best_type_enc format = 60 the_default best_type_enc 61 #> type_enc_of_string strictness 62 #> adjust_type_enc format 63 64fun has_bound_or_var_of_type pred = 65 exists_subterm (fn Var (_, T as Type _) => pred T 66 | Abs (_, T as Type _, _) => pred T 67 | _ => false) 68 69(* Unwanted equalities are those between a (bound or schematic) variable that does not properly 70 occur in the second operand. *) 71val is_exhaustive_finite = 72 let 73 fun is_bad_equal (Var z) t = 74 not (exists_subterm (fn Var z' => z = z' | _ => false) t) 75 | is_bad_equal (Bound j) t = not (loose_bvar1 (t, j)) 76 | is_bad_equal _ _ = false 77 fun do_equals t1 t2 = is_bad_equal t1 t2 orelse is_bad_equal t2 t1 78 fun do_formula pos t = 79 (case (pos, t) of 80 (_, \<^const>\<open>Trueprop\<close> $ t1) => do_formula pos t1 81 | (true, Const (\<^const_name>\<open>Pure.all\<close>, _) $ Abs (_, _, t')) => do_formula pos t' 82 | (true, Const (\<^const_name>\<open>All\<close>, _) $ Abs (_, _, t')) => do_formula pos t' 83 | (false, Const (\<^const_name>\<open>Ex\<close>, _) $ Abs (_, _, t')) => do_formula pos t' 84 | (_, \<^const>\<open>Pure.imp\<close> $ t1 $ t2) => 85 do_formula (not pos) t1 andalso (t2 = \<^prop>\<open>False\<close> orelse do_formula pos t2) 86 | (_, \<^const>\<open>HOL.implies\<close> $ t1 $ t2) => 87 do_formula (not pos) t1 andalso (t2 = \<^const>\<open>False\<close> orelse do_formula pos t2) 88 | (_, \<^const>\<open>Not\<close> $ t1) => do_formula (not pos) t1 89 | (true, \<^const>\<open>HOL.disj\<close> $ t1 $ t2) => forall (do_formula pos) [t1, t2] 90 | (false, \<^const>\<open>HOL.conj\<close> $ t1 $ t2) => forall (do_formula pos) [t1, t2] 91 | (true, Const (\<^const_name>\<open>HOL.eq\<close>, _) $ t1 $ t2) => do_equals t1 t2 92 | (true, Const (\<^const_name>\<open>Pure.eq\<close>, _) $ t1 $ t2) => do_equals t1 t2 93 | _ => false) 94 in do_formula true end 95 96(* Facts containing variables of finite types such as "unit" or "bool" or of the form 97 "ALL x. x = A | x = B | x = C" are likely to lead to untypable proofs for unsound type 98 encodings. *) 99fun is_dangerous_prop ctxt = 100 transform_elim_prop 101 #> (has_bound_or_var_of_type (is_type_surely_finite ctxt) orf is_exhaustive_finite) 102 103fun get_slices slice slices = 104 (0 upto length slices - 1) ~~ slices |> not slice ? (List.last #> single) 105 106fun get_facts_of_filter _ [(_, facts)] = facts 107 | get_facts_of_filter fact_filter factss = 108 (case AList.lookup (op =) factss fact_filter of 109 SOME facts => facts 110 | NONE => snd (hd factss)) 111 112(* For low values of "max_facts", this fudge value ensures that most slices are invoked with a 113 nontrivial amount of facts. *) 114val max_fact_factor_fudge = 5 115 116val mono_max_privileged_facts = 10 117 118fun suffix_of_mode Auto_Try = "_try" 119 | suffix_of_mode Try = "_try" 120 | suffix_of_mode Normal = "" 121 | suffix_of_mode MaSh = "" 122 | suffix_of_mode Minimize = "_min" 123 124(* Give the ATPs some slack before interrupting them the hard way. "z3_tptp" on Linux appears to be 125 the only ATP that does not honor its time limit. *) 126val atp_timeout_slack = seconds 1.0 127 128(* Important messages are important but not so important that users want to see them each time. *) 129val atp_important_message_keep_quotient = 25 130 131fun run_atp mode name 132 ({debug, verbose, overlord, type_enc, strict, lam_trans, uncurried_aliases, fact_filter, 133 max_facts, max_mono_iters, max_new_mono_instances, isar_proofs, compress, try0, smt_proofs, 134 slice, minimize, timeout, preplay_timeout, ...} : params) 135 ({comment, state, goal, subgoal, subgoal_count, factss, found_proof, ...} : prover_problem) = 136 let 137 val thy = Proof.theory_of state 138 val ctxt = Proof.context_of state 139 140 val {exec, arguments, proof_delims, known_failures, prem_role, best_slices, best_max_mono_iters, 141 best_max_new_mono_instances, ...} = get_atp thy name () 142 143 val full_proofs = isar_proofs |> the_default (mode = Minimize) 144 val local_name = perhaps (try (unprefix remote_prefix)) name 145 val spassy = (local_name = pirateN orelse local_name = spassN) 146 147 val completish = Config.get ctxt atp_completish 148 val atp_mode = if completish > 0 then Sledgehammer_Completish completish else Sledgehammer 149 val (_, hyp_ts, concl_t) = strip_subgoal goal subgoal ctxt 150 val (dest_dir, problem_prefix) = 151 if overlord then overlord_file_location_of_prover name 152 else (Config.get ctxt atp_dest_dir, Config.get ctxt atp_problem_prefix) 153 val problem_file_name = 154 Path.basic (problem_prefix ^ (if overlord then "" else serial_string ()) ^ 155 suffix_of_mode mode ^ "_" ^ string_of_int subgoal) 156 val prob_path = 157 if dest_dir = "" then 158 File.tmp_path problem_file_name 159 else if File.exists (Path.explode dest_dir) then 160 Path.append (Path.explode dest_dir) problem_file_name 161 else 162 error ("No such directory: " ^ quote dest_dir) 163 val exec = exec full_proofs 164 val command0 = 165 (case find_first (fn var => getenv var <> "") (fst exec) of 166 SOME var => 167 let 168 val pref = getenv var ^ "/" 169 val paths = 170 map (Path.explode o prefix pref) 171 (if ML_System.platform_is_windows then 172 map (suffix ".exe") (snd exec) @ snd exec 173 else snd exec); 174 in 175 (case find_first File.exists paths of 176 SOME path => path 177 | NONE => error ("Bad executable: " ^ Path.print (hd paths))) 178 end 179 | NONE => error ("The environment variable " ^ quote (List.last (fst exec)) ^ " is not set")) 180 181 fun split_time s = 182 let 183 val split = String.tokens (fn c => str c = "\n") 184 val (output, t) = s |> split |> (try split_last #> the_default ([], "0")) |>> cat_lines 185 val num = Scan.many1 Symbol.is_ascii_digit >> (fst o read_int) 186 val digit = Scan.one Symbol.is_ascii_digit 187 val num3 = digit ::: digit ::: (digit >> single) >> (fst o read_int) 188 val time = num --| Scan.$$ "." -- num3 >> (fn (a, b) => a * 1000 + b) 189 val as_time = raw_explode #> Scan.read Symbol.stopper time #> the_default 0 190 in (output, as_time t |> Time.fromMilliseconds) end 191 192 fun run () = 193 let 194 (* If slicing is disabled, we expand the last slice to fill the entire time available. *) 195 val all_slices = best_slices ctxt 196 val actual_slices = get_slices slice all_slices 197 198 fun max_facts_of_slices (slices : (real * (slice_spec * string)) list) = 199 fold (Integer.max o fst o #1 o fst o snd) slices 0 200 201 val num_actual_slices = length actual_slices 202 val max_fact_factor = 203 Real.fromInt (case max_facts of NONE => max_facts_of_slices all_slices | SOME max => max) 204 / Real.fromInt (max_facts_of_slices (map snd actual_slices)) 205 206 fun monomorphize_facts facts = 207 let 208 val ctxt = 209 ctxt 210 |> repair_monomorph_context max_mono_iters best_max_mono_iters max_new_mono_instances 211 best_max_new_mono_instances 212 (* pseudo-theorem involving the same constants as the subgoal *) 213 val subgoal_th = 214 Logic.list_implies (hyp_ts, concl_t) |> Skip_Proof.make_thm thy 215 val rths = 216 facts |> chop mono_max_privileged_facts 217 |>> map (pair 1 o snd) 218 ||> map (pair 2 o snd) 219 |> op @ 220 |> cons (0, subgoal_th) 221 in 222 Monomorph.monomorph atp_schematic_consts_of ctxt rths 223 |> tl |> curry ListPair.zip (map fst facts) 224 |> maps (fn (name, rths) => map (pair name o zero_var_indexes o snd) rths) 225 end 226 227 fun run_slice time_left (cache_key, cache_value) (slice, (time_frac, 228 (key as ((best_max_facts, best_fact_filter), format, best_type_enc, best_lam_trans, 229 best_uncurried_aliases), 230 extra))) = 231 let 232 val effective_fact_filter = fact_filter |> the_default best_fact_filter 233 val facts = get_facts_of_filter effective_fact_filter factss 234 val num_facts = 235 Real.ceil (max_fact_factor * Real.fromInt best_max_facts) + max_fact_factor_fudge 236 |> Integer.min (length facts) 237 val generate_info = (case format of DFG _ => true | _ => false) 238 val strictness = if strict then Strict else Non_Strict 239 val type_enc = type_enc |> choose_type_enc strictness best_type_enc format 240 val sound = is_type_enc_sound type_enc 241 val real_ms = Real.fromInt o Time.toMilliseconds 242 val slice_timeout = 243 (real_ms time_left 244 |> (if slice < num_actual_slices - 1 then 245 curry Real.min (time_frac * real_ms timeout) 246 else 247 I)) 248 * 0.001 249 |> seconds 250 val generous_slice_timeout = 251 if mode = MaSh then one_day else slice_timeout + atp_timeout_slack 252 val _ = 253 if debug then 254 quote name ^ " slice #" ^ string_of_int (slice + 1) ^ 255 " with " ^ string_of_int num_facts ^ " fact" ^ 256 plural_s num_facts ^ " for " ^ string_of_time slice_timeout ^ "..." 257 |> writeln 258 else 259 () 260 val readable_names = not (Config.get ctxt atp_full_names) 261 val lam_trans = lam_trans |> the_default best_lam_trans 262 val uncurried_aliases = uncurried_aliases |> the_default best_uncurried_aliases 263 val value as (atp_problem, _, _, _) = 264 if cache_key = SOME key then 265 cache_value 266 else 267 facts 268 |> not sound ? filter_out (is_dangerous_prop ctxt o Thm.prop_of o snd) 269 |> take num_facts 270 |> not (is_type_enc_polymorphic type_enc) ? monomorphize_facts 271 |> map (apsnd Thm.prop_of) 272 |> generate_atp_problem ctxt generate_info format prem_role type_enc atp_mode 273 lam_trans uncurried_aliases readable_names true hyp_ts concl_t 274 275 fun sel_weights () = atp_problem_selection_weights atp_problem 276 fun ord_info () = atp_problem_term_order_info atp_problem 277 278 val ord = effective_term_order ctxt name 279 val args = 280 arguments ctxt full_proofs extra slice_timeout (File.bash_path prob_path) 281 (ord, ord_info, sel_weights) 282 val command = 283 "(exec 2>&1; " ^ File.bash_path command0 ^ " " ^ args ^ " " ^ ")" 284 |> enclose "TIMEFORMAT='%3R'; { time " " ; }" 285 286 val _ = 287 atp_problem 288 |> lines_of_atp_problem format ord ord_info 289 |> cons ("% " ^ command ^ "\n" ^ (if comment = "" then "" else "% " ^ comment ^ "\n")) 290 |> File.write_list prob_path 291 292 val ((output, run_time), (atp_proof, outcome)) = 293 Timeout.apply generous_slice_timeout Isabelle_System.bash_output command 294 |>> (if overlord then prefix ("% " ^ command ^ "\n% " ^ timestamp () ^ "\n") else I) 295 |> fst |> split_time 296 |> (fn accum as (output, _) => 297 (accum, 298 extract_tstplike_proof_and_outcome verbose proof_delims known_failures output 299 |>> atp_proof_of_tstplike_proof (perhaps (try (unprefix remote_prefix)) name) 300 atp_problem 301 handle UNRECOGNIZED_ATP_PROOF () => ([], SOME ProofUnparsable))) 302 handle Timeout.TIMEOUT _ => (("", slice_timeout), ([], SOME TimedOut)) 303 304 val outcome = 305 (case outcome of 306 NONE => 307 (case used_facts_in_unsound_atp_proof ctxt (map fst facts) atp_proof of 308 SOME facts => 309 let 310 val failure = UnsoundProof (is_type_enc_sound type_enc, sort string_ord facts) 311 in 312 if debug then (warning (string_of_atp_failure failure); NONE) else SOME failure 313 end 314 | NONE => (found_proof (); NONE)) 315 | _ => outcome) 316 in 317 ((SOME key, value), (output, run_time, facts, atp_proof, outcome), 318 SOME (format, type_enc)) 319 end 320 321 val timer = Timer.startRealTimer () 322 323 fun maybe_run_slice slice (result as (cache, (_, run_time0, _, _, SOME _), _)) = 324 let val time_left = timeout - Timer.checkRealTimer timer in 325 if time_left <= Time.zeroTime then 326 result 327 else 328 run_slice time_left cache slice 329 |> (fn (cache, (output, run_time, used_from, atp_proof, outcome), 330 format_type_enc) => 331 (cache, (output, run_time0 + run_time, used_from, atp_proof, outcome), 332 format_type_enc)) 333 end 334 | maybe_run_slice _ result = result 335 in 336 ((NONE, ([], Symtab.empty, [], Symtab.empty)), 337 ("", Time.zeroTime, [], [], SOME InternalError), NONE) 338 |> fold maybe_run_slice actual_slices 339 end 340 341 (* If the problem file has not been exported, remove it; otherwise, export 342 the proof file too. *) 343 fun clean_up () = if dest_dir = "" then (try File.rm prob_path; ()) else () 344 fun export (_, (output, _, _, _, _), _) = 345 if dest_dir = "" then () 346 else File.write (Path.explode (Path.implode prob_path ^ "_proof")) output 347 348 val ((_, (_, pool, lifted, sym_tab)), (output, run_time, used_from, atp_proof, outcome), 349 SOME (format, type_enc)) = 350 with_cleanup clean_up run () |> tap export 351 352 val important_message = 353 if mode = Normal andalso Random.random_range 0 (atp_important_message_keep_quotient - 1) = 0 354 then extract_important_message output 355 else "" 356 357 val (used_facts, preferred_methss, message) = 358 (case outcome of 359 NONE => 360 let 361 val used_facts = sort_by fst (used_facts_in_atp_proof ctxt (map fst used_from) atp_proof) 362 val needs_full_types = is_typed_helper_used_in_atp_proof atp_proof 363 val preferred_methss = 364 (Metis_Method (NONE, NONE), 365 bunches_of_proof_methods try0 (smt_proofs <> SOME false) needs_full_types 366 (if atp_proof_prefers_lifting atp_proof then liftingN else hide_lamsN)) 367 in 368 (used_facts, preferred_methss, 369 fn preplay => 370 let 371 val _ = if verbose then writeln "Generating proof text..." else () 372 373 fun isar_params () = 374 let 375 val metis_type_enc = 376 if is_typed_helper_used_in_atp_proof atp_proof then SOME full_typesN else NONE 377 val metis_lam_trans = 378 if atp_proof_prefers_lifting atp_proof then SOME liftingN else NONE 379 val atp_proof = 380 atp_proof 381 |> termify_atp_proof ctxt name format type_enc pool lifted sym_tab 382 |> spassy ? introduce_spassy_skolems 383 |> factify_atp_proof (map fst used_from) hyp_ts concl_t 384 in 385 (verbose, (metis_type_enc, metis_lam_trans), preplay_timeout, compress, try0, 386 minimize, atp_proof, goal) 387 end 388 389 val one_line_params = (preplay (), proof_banner mode name, subgoal, subgoal_count) 390 val num_chained = length (#facts (Proof.goal state)) 391 in 392 proof_text ctxt debug isar_proofs smt_proofs isar_params num_chained 393 one_line_params ^ 394 (if important_message <> "" then 395 "\n\nImportant message from Dr. Geoff Sutcliffe:\n" ^ important_message 396 else 397 "") 398 end) 399 end 400 | SOME failure => 401 ([], (Auto_Method (* dummy *), []), fn _ => string_of_atp_failure failure)) 402 in 403 {outcome = outcome, used_facts = used_facts, used_from = used_from, 404 preferred_methss = preferred_methss, run_time = run_time, message = message} 405 end 406 407end; 408