1theory Computations
2  imports Codegen_Basics.Setup
3    "HOL-Library.Code_Target_Int"
4begin
5
6section \<open>Computations \label{sec:computations}\<close>
7
8subsection \<open>Prelude -- The @{text code} antiquotation \label{sec:code_antiq}\<close>
9
10text \<open>
11  The @{ML_antiquotation_def code} antiquotation allows to include constants
12  from
13  generated code directly into ML system code, as in the following toy
14  example:
15\<close>
16
17datatype %quote form = T | F | And form form | Or form form (*<*)
18
19(*>*) ML %quotetypewriter \<open>
20  fun eval_form @{code T} = true
21    | eval_form @{code F} = false
22    | eval_form (@{code And} (p, q)) =
23        eval_form p andalso eval_form q
24    | eval_form (@{code Or} (p, q)) =
25        eval_form p orelse eval_form q;
26\<close>
27
28text \<open>
29  \noindent The antiquotation @{ML_antiquotation code} takes
30  the name of a constant as argument;
31  the required code is generated
32  transparently and the corresponding constant names are inserted
33  for the given antiquotations.  This technique allows to use pattern
34  matching on constructors stemming from compiled datatypes.
35  Note that the @{ML_antiquotation code}
36  antiquotation may not refer to constants which carry adaptations;
37  here you have to refer to the corresponding adapted code directly.
38\<close>
39
40  
41subsection \<open>The concept of computations\<close>
42
43text \<open>
44  Computations embody the simple idea that for each
45  monomorphic Isabelle/HOL term of type @{text \<tau>} by virtue of
46  code generation there exists an corresponding ML type @{text T} and
47  a morphism @{text "\<Phi> :: \<tau> \<rightarrow> T"} satisfying
48  @{text "\<Phi> (t\<^sub>1 \<cdot> t\<^sub>2) = \<Phi> t\<^sub>1 \<cdot> \<Phi> t\<^sub>2"}, with @{text \<cdot>} denoting
49  term application.
50
51  For a given Isabelle/HOL type @{text \<tau>}, parts of @{text \<Phi>} can be
52  implemented by a corresponding ML function @{text "\<phi>\<^sub>\<tau> :: term \<rightarrow> T"}.
53  How?
54
55    \<^descr> \<open>Let input be a constant C :: \<tau>.\<close> \\
56      Then @{text "\<phi>\<^sub>\<tau> C = f\<^sub>C"} with @{text "f\<^sub>C"} being
57      the image of @{text C} under code generation.
58
59    \<^descr> \<open>Let input be an application (t\<^sub>1 \<cdot> t\<^sub>2) :: \<tau>.\<close> \\
60      Then @{text "\<phi>\<^sub>\<tau> (t\<^sub>1 \<cdot> t\<^sub>2) = \<phi>\<^sub>\<tau> t\<^sub>1 (\<phi>\<^sub>\<tau> t\<^sub>2)"}.
61
62  \noindent Using these trivial properties, each monomorphic constant
63  @{text "C : \<^vec>\<tau>\<^sub>n \<rightarrow> \<tau>"} yields the following
64  equations:
65\<close>
66
67text %quote \<open>
68  @{text "\<phi>\<^bsub>(\<tau>\<^sub>1 \<rightarrow> \<tau>\<^sub>2 \<rightarrow> \<dots> \<rightarrow> \<tau>\<^sub>n \<rightarrow> \<tau>)\<^esub> C = f\<^sub>C"} \\
69  @{text "\<phi>\<^bsub>(\<tau>\<^sub>2 \<rightarrow> \<dots> \<rightarrow> \<tau>\<^sub>n \<rightarrow> \<tau>)\<^esub> (C \<cdot> t\<^sub>1) = f\<^sub>C (\<phi>\<^bsub>\<tau>\<^sub>1\<^esub> t\<^sub>1)"} \\
70  @{text "\<dots>"} \\
71  @{text "\<phi>\<^bsub>\<tau>\<^esub> (C \<cdot> t\<^sub>1 \<cdot> \<dots> \<cdot> t\<^sub>n) = f\<^sub>C (\<phi>\<^bsub>\<tau>\<^sub>1\<^esub> t\<^sub>1) \<dots> (\<phi>\<^bsub>\<tau>\<^sub>n\<^esub> t\<^sub>n)"}
72\<close>
73  
74text \<open>
75  \noindent Hence a computation is characterized as follows:
76
77    \<^item> Let @{text "input constants"} denote a set of monomorphic constants.
78
79    \<^item> Let @{text \<tau>} denote a monomorphic type and @{text "'ml"} be a schematic
80      placeholder for its corresponding type in ML under code generation.
81
82    \<^item> Then the corresponding computation is an ML function of type
83      @{ML_type "Proof.context -> term -> 'ml"}
84      partially implementing the morphism @{text "\<Phi> :: \<tau> \<rightarrow> T"} for all
85      \<^emph>\<open>input terms\<close> consisting only of input constants and applications.
86
87  \noindent The charming idea is that all required code is automatically generated
88  by the code generator for givens input constants and types;
89  that code is directly inserted into the Isabelle/ML runtime system
90  by means of antiquotations.
91\<close>
92
93
94subsection \<open>The @{text computation} antiquotation\<close>
95
96text \<open>
97  The following example illustrates its basic usage:
98\<close>
99
100ML %quotetypewriter \<open>
101  local
102
103  fun int_of_nat @{code "0 :: nat"} = 0
104    | int_of_nat (@{code Suc} n) = int_of_nat n + 1;
105
106  in
107
108  val comp_nat = @{computation nat terms:
109    "plus :: nat \<Rightarrow> nat \<Rightarrow> nat" "times :: nat \<Rightarrow> nat \<Rightarrow> nat"
110    "sum_list :: nat list \<Rightarrow> nat" "prod_list :: nat list \<Rightarrow> nat"
111    datatypes: nat "nat list"}
112    (fn post => post o HOLogic.mk_nat o int_of_nat o the);
113
114  end
115\<close>
116
117text \<open>
118    \<^item> Antiquotations occurring in the
119      same ML block always refer to the same transparently generated code;
120      particularly, they share the same transparently generated datatype
121      declarations.
122
123    \<^item> The type of a computation is specified as first argument.
124
125    \<^item> Input constants are specified the following ways:
126
127        \<^item> Each term following @{text "terms:"} specifies all constants
128          it contains as input constants.
129
130        \<^item> Each type following @{text "datatypes:"} specifies all constructors
131          of the corresponding code datatype as input constants.  Note that
132          this does not increase expressiveness but succinctness for datatypes
133          with many constructors.  Abstract type constructors are skipped
134          silently.
135
136    \<^item> The code generated by a @{text computation} antiquotation takes a functional argument
137      which describes how to conclude the computation.  What's the rationale
138      behind this?
139
140        \<^item> There is no automated way to generate a reconstruction function
141          from the resulting ML type to a Isabelle term -- this is in the
142          responsibility of the implementor.  One possible approach
143          for robust term reconstruction is the @{text code} antiquotation.
144
145        \<^item> Both statically specified input constants and dynamically provided input
146          terms are subject to preprocessing.  Likewise the result
147          is supposed to be subject to postprocessing; the implementor
148          is expected to take care for this explicitly.
149
150        \<^item> Computations follow the partiality principle (cf.~\secref{sec:partiality_principle}):
151          failures due to pattern matching or unspecified functions
152          are interpreted as partiality; therefore resulting ML values
153          are optional. 
154
155      Hence the functional argument accepts the following parameters
156
157        \<^item> A postprocessor function @{ML_type "term -> term"}.
158
159        \<^item> The resulting value as optional argument.
160
161      The functional argument is supposed to compose the final result
162      from these in a suitable manner.
163
164  \noindent A simple application:
165\<close>
166
167ML_val %quotetypewriter \<open>
168  comp_nat @{context} @{term "sum_list [Suc 0, Suc (Suc 0)] * Suc (Suc 0)"}
169\<close>
170
171text \<open>
172  \noindent A single ML block may contain an arbitrary number of computation
173  antiquotations.  These share the \<^emph>\<open>same\<close> set of possible
174  input constants.  In other words, it does not matter in which
175  antiquotation constants are specified;  in the following example,
176  \<^emph>\<open>all\<close> constants are specified by the first antiquotation once
177  and for all:
178\<close>
179
180ML %quotetypewriter \<open>
181  local
182
183  fun int_of_nat @{code "0 :: nat"} = 0
184    | int_of_nat (@{code Suc} n) = int_of_nat n + 1;
185
186  in
187
188  val comp_nat = @{computation nat terms:
189    "plus :: nat \<Rightarrow> nat \<Rightarrow> nat" "times :: nat \<Rightarrow> nat \<Rightarrow> nat"
190    "sum_list :: nat list \<Rightarrow> nat" "prod_list :: nat list \<Rightarrow> nat"
191    "replicate :: nat \<Rightarrow> nat \<Rightarrow> nat list"
192    datatypes: nat "nat list"}
193    (fn post => post o HOLogic.mk_nat o int_of_nat o the);
194
195  val comp_nat_list = @{computation "nat list"}
196    (fn post => post o HOLogic.mk_list @{typ nat} o
197      map (HOLogic.mk_nat o int_of_nat) o the);
198
199  end
200\<close>
201
202  
203subsection \<open>Pitfalls when specifying input constants \label{sec:input_constants_pitfalls}\<close>
204
205text \<open>
206    \<^descr> \<open>Complete type coverage.\<close> Specified input constants must
207      be \<^emph>\<open>complete\<close> in the sense that for each
208      required type @{text \<tau>} there is at least one corresponding
209      input constant which can actually \<^emph>\<open>construct\<close> a concrete
210      value of type @{text \<tau>}, potentially requiring more types recursively;
211      otherwise the system of equations cannot be generated properly.
212      Hence such incomplete input constants sets are rejected immediately.
213
214    \<^descr> \<open>Unsuitful right hand sides.\<close> The generated code for a computation
215      must compile in the strict ML runtime environment.  This imposes
216      the technical restriction that each compiled input constant
217      @{text f\<^sub>C} on the right hand side of a generated equations
218      must compile without throwing an exception.  That rules
219      out pathological examples like @{term [source] "undefined :: nat"}
220      as input constants, as well as abstract constructors (cf. \secref{sec:invariant}).
221
222    \<^descr> \<open>Preprocessing.\<close> For consistency, input constants are subject
223      to preprocessing;  however, the overall approach requires
224      to operate on constants @{text C} and their respective compiled images
225      @{text f\<^sub>C}.\footnote{Technical restrictions of the implementation
226      enforce this, although those could be lifted in the future.}
227      This is a problem whenever preprocessing maps an input constant
228      to a non-constant.
229
230      To circumvent these situations, the computation machinery
231      has a dedicated preprocessor which is applied \<^emph>\<open>before\<close>
232      the regular preprocessing, both to input constants as well as
233      input terms.  This can be used to map problematic constants
234      to other ones that are not subject to regular preprocessing. 
235      Rewrite rules are added using attribute @{attribute code_computation_unfold}.
236      There should rarely be a need to use this beyond the few default
237      setups in HOL, which deal with literals (see also \secref{sec:literal_input}).
238\<close>
239
240  
241subsection \<open>Pitfalls concerning input terms\<close>
242
243text \<open>
244    \<^descr> \<open>No polymorphims.\<close> Input terms must be monomorphic: compilation
245      to ML requires dedicated choice of monomorphic types.
246
247    \<^descr> \<open>No abstractions.\<close> Only constants and applications are admissible
248      as input; abstractions are not possible.  In theory, the
249      compilation schema could be extended to cover abstractions also,
250      but this would increase the trusted code base.  If abstractions
251      are required, they can always be eliminated by a dedicated preprocessing
252      step, e.g.~using combinatorial logic.
253
254    \<^descr> \<open>Potential interfering of the preprocessor.\<close> As described in \secref{sec:input_constants_pitfalls}
255      regular preprocessing can have a disruptive effect for input constants.
256      The same also applies to input terms; however the same measures 
257      to circumvent that problem for input constants apply to input terms also.
258\<close>
259
260  
261subsection \<open>Computations using the @{text computation_conv} antiquotation\<close>
262
263text \<open>
264  Computations are a device to implement fast proof procedures.
265  Then results of a computation are often assumed to be trustable
266  and thus wrapped in oracles (see @{cite "isabelle-isar-ref"}),
267  as in the following example:\footnote{
268  The technical details how numerals are dealt with are given later in
269  \secref{sec:literal_input}.}
270\<close>
271
272ML %quotetypewriter \<open>
273  local
274
275  fun raw_dvd (b, ct) = Thm.mk_binop @{cterm "Pure.eq :: bool \<Rightarrow> bool \<Rightarrow> prop"}
276    ct (if b then @{cterm True} else @{cterm False});
277
278  val (_, dvd_oracle) = Context.>>> (Context.map_theory_result
279    (Thm.add_oracle (@{binding dvd}, raw_dvd)));
280
281  in
282
283  val conv_dvd = @{computation_conv bool terms:
284    "Rings.dvd :: int \<Rightarrow> int \<Rightarrow> bool"
285    "plus :: int \<Rightarrow> int \<Rightarrow> int"
286    "minus :: int \<Rightarrow> int \<Rightarrow> int"
287    "times :: int \<Rightarrow> int \<Rightarrow> int"
288    "0 :: int" "1 :: int" "2 :: int" "3 :: int" "-1 :: int"
289  } (K (curry dvd_oracle))
290
291  end
292\<close>
293
294text \<open>
295    \<^item> Antiquotation @{ML_antiquotation computation_conv} basically yields
296      a conversion of type @{ML_type "Proof.context -> cterm -> thm"}
297      (see further @{cite "isabelle-implementation"}).
298
299    \<^item> The antiquotation expects one functional argument to bridge the
300      \qt{untrusted gap};  this can be done e.g.~using an oracle.  Since that oracle
301      will only yield \qt{valid} results in the context of that particular
302      computation, the implementor must make sure that it does not leave
303      the local ML scope;  in this example, this is achieved using
304      an explicit @{text local} ML block.  The very presence of the oracle
305      in the code acknowledges that each computation requires explicit thinking
306      before it can be considered trustworthy!
307
308    \<^item> Postprocessing just operates as further conversion after normalization.
309
310    \<^item> Partiality makes the whole conversion fall back to reflexivity.
311\<close> (*<*)
312
313(*>*) ML_val %quotetypewriter \<open>
314  conv_dvd @{context} @{cterm "7 dvd ( 62437867527846782 :: int)"};
315  conv_dvd @{context} @{cterm "7 dvd (-62437867527846783 :: int)"};
316\<close>
317
318text \<open>
319  \noindent An oracle is not the only way to construct a valid theorem.
320  A computation result can be used to construct an appropriate certificate:
321\<close>
322
323lemma %quote conv_div_cert:
324  "(Code_Target_Int.positive r * Code_Target_Int.positive s)
325     div Code_Target_Int.positive s \<equiv> (numeral r :: int)" (is "?lhs \<equiv> ?rhs")
326proof (rule eq_reflection)
327  have "?lhs = numeral r * numeral s div numeral s"
328    by simp
329  also have "numeral r * numeral s div numeral s = ?rhs"
330    by (rule nonzero_mult_div_cancel_right) simp
331  finally show "?lhs = ?rhs" .
332qed
333
334lemma %quote positive_mult:
335  "Code_Target_Int.positive r * Code_Target_Int.positive s = 
336    Code_Target_Int.positive (r * s)"
337  by simp
338  
339ML %quotetypewriter \<open>
340  local
341
342  fun integer_of_int (@{code int_of_integer} k) = k
343
344  val cterm_of_int = Thm.cterm_of @{context} o HOLogic.mk_numeral o integer_of_int;
345
346  val divisor = Thm.dest_arg o Thm.dest_arg;
347
348  val evaluate_simps = map mk_eq @{thms arith_simps positive_mult};
349
350  fun evaluate ctxt = 
351    Simplifier.rewrite_rule ctxt evaluate_simps;
352
353  fun revaluate ctxt k ct =
354    @{thm conv_div_cert}
355    |> Thm.instantiate' [] [SOME (cterm_of_int k), SOME (divisor ct)]
356    |> evaluate ctxt;
357
358  in
359
360  val conv_div = @{computation_conv int terms:
361    "divide :: int \<Rightarrow> int \<Rightarrow> int"
362    "0 :: int" "1 :: int" "2 :: int" "3 :: int"
363  } revaluate
364
365  end
366\<close>
367
368ML_val %quotetypewriter \<open>
369  conv_div @{context}
370    @{cterm "46782454343499999992777742432342242323423425 div (7 :: int)"}
371\<close>
372
373text \<open>
374  \noindent The example is intentionally kept simple: only positive
375  integers are covered, only remainder-free divisions are feasible,
376  and the input term is expected to have a particular shape.
377  This exhibits the idea more clearly:
378  the result of the computation is used as a mere
379  hint how to instantiate @{fact conv_div_cert}, from which the required
380  theorem is obtained by performing multiplication using the
381  simplifier;  hence that theorem is built of proper inferences,
382  with no oracles involved.
383\<close>
384
385
386subsection \<open>Computations using the @{text computation_check} antiquotation\<close>
387
388text \<open>
389  The @{text computation_check} antiquotation is convenient if
390  only a positive checking of propositions is desired, because then
391  the result type is fixed (@{typ prop}) and all the technical
392  matter concerning postprocessing and oracles is done in the framework
393  once and for all:
394\<close>
395
396ML %quotetypewriter \<open>
397  val check_nat = @{computation_check terms:
398    Trueprop "less :: nat \<Rightarrow> nat \<Rightarrow> bool" "plus :: nat \<Rightarrow> nat \<Rightarrow> nat" 
399    "times :: nat \<Rightarrow> nat \<Rightarrow> nat"
400    "0 :: nat" Suc
401  }
402\<close>
403
404text \<open>
405  \noindent The HOL judgement @{term Trueprop} embeds an expression
406  of type @{typ bool} into @{typ prop}.
407\<close>
408
409ML_val %quotetypewriter \<open>
410  check_nat @{context} @{cprop "less (Suc (Suc 0)) (Suc (Suc (Suc 0)))"}
411\<close>
412
413text \<open>
414  \noindent Note that such computations can only \<^emph>\<open>check\<close>
415  for @{typ prop}s to hold but not \<^emph>\<open>decide\<close>.
416\<close>
417
418
419subsection \<open>Some practical hints\<close>  
420
421subsubsection \<open>Inspecting generated code\<close>
422
423text \<open>
424  The antiquotations for computations attempt to produce meaningful error
425  messages.  If these are not sufficient, it might by useful to
426  inspect the generated code, which can be achieved using
427\<close>
428  
429declare %quote [[ML_source_trace]] (*<*) declare %quote [[ML_source_trace = false]] (*>*)
430
431
432subsubsection \<open>Literals as input constants \label{sec:literal_input}\<close>
433
434text \<open>
435  Literals (characters, numerals) in computations cannot be processed
436  naively: the compilation pattern for computations fails whenever
437  target-language literals are involved; since various
438  common code generator setups (see \secref{sec:common_adaptation})
439  implement @{typ nat} and @{typ int} by target-language literals,
440  this problem manifests whenever numeric types are involved.
441  In practice, this is circumvented with a dedicated preprocessor
442  setup for literals (see also \secref{sec:input_constants_pitfalls}).
443
444  The following examples illustrate the pattern
445  how to specify input constants when literals are involved, without going into
446  too much detail:
447\<close>
448
449paragraph \<open>An example for @{typ nat}\<close>
450
451ML %quotetypewriter \<open>
452  val check_nat = @{computation_check terms:
453    Trueprop "even :: nat \<Rightarrow> bool" "plus :: nat \<Rightarrow> nat \<Rightarrow> nat" 
454    "0 :: nat" Suc "1 :: nat" "2 :: nat" "3 :: nat"
455  }
456\<close>
457
458ML_val %quotetypewriter \<open>
459  check_nat @{context} @{cprop "even (Suc 0 + 1 + 2 + 3 + 4 + 5)"}
460\<close>
461  
462paragraph \<open>An example for @{typ int}\<close>
463
464ML %quotetypewriter \<open>
465  val check_int = @{computation_check terms:
466    Trueprop "even :: int \<Rightarrow> bool" "plus :: int \<Rightarrow> int \<Rightarrow> int" 
467    "0 :: int" "1 :: int" "2 :: int" "3 :: int" "-1 :: int"
468  }
469\<close>
470
471ML_val %quotetypewriter \<open>
472  check_int @{context} @{cprop "even ((0::int) + 1 + 2 + 3 + -1 + -2 + -3)"}
473\<close>
474  
475paragraph \<open>An example for @{typ String.literal}\<close>
476
477definition %quote is_cap_letter :: "String.literal \<Rightarrow> bool"
478  where "is_cap_letter s \<longleftrightarrow> (case String.asciis_of_literal s
479    of [] \<Rightarrow> False | k # _ \<Rightarrow> 65 \<le> k \<and> k \<le> 90)" (*<*)
480
481(*>*) ML %quotetypewriter \<open>
482  val check_literal = @{computation_check terms:
483    Trueprop is_cap_letter datatypes: bool String.literal
484  }
485\<close>
486
487ML_val %quotetypewriter \<open>
488  check_literal @{context} @{cprop "is_cap_letter (STR ''Q'')"}
489\<close>
490
491  
492subsubsection \<open>Preprocessing HOL terms into evaluable shape\<close>
493
494text \<open>
495  When integrating decision procedures developed inside HOL into HOL itself,
496  a common approach is to use a deep embedding where operators etc.
497  are represented by datatypes in Isabelle/HOL.  Then it is necessary
498  to turn generic shallowly embedded statements into that particular
499  deep embedding (\qt{reification}).
500
501  One option is to hardcode using code antiquotations (see \secref{sec:code_antiq}).
502  Another option is to use pre-existing infrastructure in HOL:
503  @{ML "Reification.conv"} and @{ML "Reification.tac"}.
504
505  A simplistic example:
506\<close>
507
508datatype %quote form_ord = T | F | Less nat nat
509  | And form_ord form_ord | Or form_ord form_ord | Neg form_ord
510
511primrec %quote interp :: "form_ord \<Rightarrow> 'a::order list \<Rightarrow> bool"
512where
513  "interp T vs \<longleftrightarrow> True"
514| "interp F vs \<longleftrightarrow> False"
515| "interp (Less i j) vs \<longleftrightarrow> vs ! i < vs ! j"
516| "interp (And f1 f2) vs \<longleftrightarrow> interp f1 vs \<and> interp f2 vs"
517| "interp (Or f1 f2) vs \<longleftrightarrow> interp f1 vs \<or> interp f2 vs"
518| "interp (Neg f) vs \<longleftrightarrow> \<not> interp f vs"
519
520text \<open>
521  \noindent The datatype @{type form_ord} represents formulae whose semantics is given by
522  @{const interp}.  Note that values are represented by variable indices (@{typ nat})
523  whose concrete values are given in list @{term vs}.
524\<close>
525
526ML %quotetypewriter (*<*) \<open>\<close>
527lemma "thm": fixes x y z :: "'a::order" shows "x < y \<and> x < z \<equiv> interp (And (Less (Suc 0) (Suc (Suc 0))) (Less (Suc 0) 0)) [z, x, y]"
528ML_prf %quotetypewriter
529(*>*) \<open>val thm =
530  Reification.conv @{context} @{thms interp.simps} @{cterm "x < y \<and> x < z"}\<close> (*<*)
531by (tactic \<open>ALLGOALS (resolve_tac @{context} [thm])\<close>)
532(*>*) 
533
534text \<open>
535  \noindent By virtue of @{fact interp.simps}, @{ML "Reification.conv"} provides a conversion
536  which, for this concrete example, yields @{thm thm [no_vars]}.  Note that the argument
537  to @{const interp} does not contain any free variables and can thus be evaluated
538  using evaluation.
539
540  A less meager example can be found in the AFP, session @{text "Regular-Sets"},
541  theory @{text Regexp_Method}.
542\<close>
543
544end
545