1(* ========================================================================= *)
2(* Create "informationTheory" setting up the theory of information           *)
3(* ========================================================================= *)
4
5(* ------------------------------------------------------------------------- *)
6(* Load and open relevant theories                                           *)
7(* (Comment out "load" and "loadPath"s for compilation)                      *)
8(* ------------------------------------------------------------------------- *)
9(*
10
11app load ["bossLib", "metisLib", "arithmeticTheory", "pred_setTheory", "listTheory",
12          "state_transformerTheory", "probabilityTheory", "formalizeUseful",
13          "combinTheory", "pairTheory", "realTheory", "realLib", "extra_boolTheory", "jrhUtils",
14          "extra_pred_setTheory", "realSimps", "extra_realTheory",
15          "measureTheory", "numTheory", "simpLib",
16          "seqTheory", "subtypeTheory",
17          "transcTheory", "limTheory", "stringTheory", "rich_listTheory", "stringSimps",
18          "listSimps", "lebesgueTheory", "borelTheory"];
19
20*)
21
22open HolKernel Parse boolLib bossLib metisLib arithmeticTheory pred_setTheory
23     listTheory state_transformerTheory
24     probabilityTheory formalizeUseful extra_numTheory combinTheory
25     pairTheory realTheory realLib extra_boolTheory jrhUtils
26     extra_pred_setTheory realSimps extra_realTheory measureTheory numTheory
27     simpLib seqTheory subtypeTheory
28     transcTheory limTheory stringTheory rich_listTheory stringSimps listSimps
29     lebesgueTheory borelTheory;
30
31open real_sigmaTheory;
32
33(* ------------------------------------------------------------------------- *)
34(* Start a new theory called "information"                                   *)
35(* ------------------------------------------------------------------------- *)
36
37val _ = new_theory "information";
38
39(* ------------------------------------------------------------------------- *)
40(* Helpful proof tools                                                       *)
41(* ------------------------------------------------------------------------- *)
42
43val REVERSE = Tactical.REVERSE;
44val Simplify = RW_TAC arith_ss;
45val Suff = PARSE_TAC SUFF_TAC;
46val Know = PARSE_TAC KNOW_TAC;
47val Rewr = DISCH_THEN (REWRITE_TAC o wrap);
48val Rewr' = DISCH_THEN (ONCE_REWRITE_TAC o wrap);
49val Cond =
50  DISCH_THEN
51  (fn mp_th =>
52   let
53     val cond = fst (dest_imp (concl mp_th))
54   in
55     KNOW_TAC cond >| [ALL_TAC, DISCH_THEN (MP_TAC o MP mp_th)]
56   end);
57
58val POP_ORW = POP_ASSUM (fn thm => ONCE_REWRITE_TAC [thm]);
59
60val safe_list_ss = bool_ss ++ LIST_ss;
61val safe_string_ss = bool_ss ++ STRING_ss;
62val arith_string_ss = arith_ss ++ STRING_ss;
63
64(* ************************************************************************* *)
65(* ************************************************************************* *)
66(* Basic Definitions                                                         *)
67(* ************************************************************************* *)
68(* ************************************************************************* *)
69
70val KL_divergence_def = Define
71   `KL_divergence b s u v =
72        integral (space s, subsets s, u) (\x. logr b ((RN_deriv (space s, subsets s, v) u) x))`;
73
74
75val mutual_information_def = Define
76   `mutual_information b p s1 s2 X Y  =
77        let prod_space =
78                prod_measure_space (space s1, subsets s1, distribution p X)
79                                   (space s2, subsets s2, distribution p Y) in
80        KL_divergence b
81                (p_space prod_space, events prod_space)
82                (joint_distribution p X Y)
83                (prob prod_space)`;
84
85val entropy_def = Define
86   `entropy b p s X = mutual_information b p s s X X`;
87
88
89val conditional_mutual_information_def = Define
90   `conditional_mutual_information b p s1 s2 s3 X Y Z =
91        let prod_space =
92            prod_measure_space (space s2, subsets s2, distribution p Y)
93                               (space s3, subsets s3, distribution p Z) in
94        mutual_information b p s1 (p_space prod_space, events prod_space) X (\x. (Y x, Z x)) -
95        mutual_information b p s1 s3 X Z`;
96
97
98(* ************************************************************************* *)
99(* ************************************************************************* *)
100(* Proofs                                                                    *)
101(* ************************************************************************* *)
102(* ************************************************************************* *)
103
104(* ************************************************************************* *)
105(* Reductions                                                                *)
106(* ************************************************************************* *)
107
108
109val finite_mutual_information_reduce = store_thm
110  ("finite_mutual_information_reduce",
111   ``!b p s1 s2 X Y. (POW (p_space p) = events p) /\
112             random_variable X p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) /\
113             random_variable Y p (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) /\
114                    FINITE (p_space p) ==>
115        (mutual_information b p (IMAGE X (p_space p), POW (IMAGE X (p_space p)))
116                                (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) X Y =
117         SIGMA (\(x,y). (joint_distribution p X Y {(x,y)}) *
118                         logr b ((joint_distribution p X Y {(x,y)})/
119                                 ((distribution p X {x})*(distribution p Y {y}))))
120               ((IMAGE X (p_space p)) CROSS (IMAGE Y (p_space p))))``,
121   RW_TAC std_ss [mutual_information_def, KL_divergence_def, space_def, subsets_def]
122   >> `FINITE (IMAGE X (p_space p)) /\ FINITE (IMAGE Y (p_space p))`
123        by RW_TAC std_ss [IMAGE_FINITE]
124   >> Q.ABBREV_TAC `s1 = IMAGE X (p_space p)`
125   >> Q.ABBREV_TAC `s2 = IMAGE Y (p_space p)`
126   >> Know `prod_space = (s1 CROSS s2, POW (s1 CROSS s2), prod_measure (s1, POW s1, distribution p X)
127                                                                  (s2, POW s2, distribution p Y))`
128   >- ( Q.UNABBREV_TAC `prod_space`
129        >> RW_TAC std_ss [prod_measure_space_def, m_space_def, subsets_def, EXTENSION, subsets_def,
130                          sigma_def, prod_sets_def, IN_POW, IN_BIGINTER, measurable_sets_def, SUBSET_DEF,
131                          IN_CROSS, GSPECIFICATION]
132        >> RW_TAC std_ss [GSYM EXTENSION]
133        >> EQ_TAC
134        >- (RW_TAC std_ss [] >> Q.PAT_X_ASSUM `!s. P ==> x IN s` (MP_TAC o Q.SPEC `POW (s1 CROSS s2)`)
135            >> RW_TAC std_ss [IN_POW, SUBSET_DEF, IN_CROSS, POW_SIGMA_ALGEBRA]
136            >> Suff `(!x''. (?x'. (x'',T) = (\(s,t). (s CROSS t,
137                        (!x. x IN s ==> x IN s1) /\ !x. x IN t ==> x IN s2))
138                        x') ==> !x. x IN x'' ==> FST x IN s1 /\ SND x IN s2)` >- METIS_TAC []
139            >> RW_TAC std_ss [] >> (MP_TAC o Q.ISPEC `(x''' :('d -> bool) # ('e -> bool))`) pair_CASES
140            >> STRIP_TAC >> FULL_SIMP_TAC std_ss [] >> METIS_TAC [IN_CROSS] )
141        >> RW_TAC std_ss []
142        >> `x = BIGUNION (IMAGE (\a. {a}) x)`
143                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_BIGUNION_IMAGE, IN_SING])
144        >> POP_ORW
145        >> FULL_SIMP_TAC std_ss [SIGMA_ALGEBRA, subsets_def]
146        >> POP_ASSUM MATCH_MP_TAC
147        >> CONJ_TAC
148        >- (MATCH_MP_TAC FINITE_COUNTABLE >> MATCH_MP_TAC IMAGE_FINITE
149                >> (MP_TAC o Q.ISPEC `(s1 :'d -> bool) CROSS (s2 :'e -> bool)`) SUBSET_FINITE
150                >> RW_TAC std_ss [FINITE_CROSS]
151                >> POP_ASSUM MATCH_MP_TAC
152                >> METIS_TAC [SUBSET_DEF, IN_CROSS])
153        >> RW_TAC std_ss [SUBSET_DEF, IN_IMAGE, IN_SING]
154        >> Q.PAT_X_ASSUM `!x. Q ==> x IN P` MATCH_MP_TAC
155        >> Q.EXISTS_TAC `({FST a}, {SND a})` >> RW_TAC std_ss [PAIR_EQ, IN_SING]
156        >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_CROSS, IN_SING]
157        >> METIS_TAC [PAIR_EQ, PAIR, FST, SND] )
158   >> DISCH_TAC
159   >> POP_ORW
160   >> RW_TAC std_ss [PSPACE, EVENTS]
161   >> (MP_TAC o Q.SPECL [`p`, `s1`, `s2`, `X`, `Y`] o
162        INST_TYPE [``:'c`` |-> ``:'e``, ``:'b``|->``:'d``]) finite_marginal_product_space_POW2
163   >> ASM_SIMP_TAC std_ss []
164   >> STRIP_TAC
165   >> (MP_TAC o Q.SPEC `(s1 CROSS s2,POW (s1 CROSS s2),joint_distribution p X Y)`
166        o INST_TYPE [``:'a`` |-> ``:'d # 'e``]) finite_space_POW_integral_reduce
167   >> ASM_SIMP_TAC std_ss [m_space_def, measurable_sets_def, FINITE_CROSS]
168   >> STRIP_TAC >> POP_ASSUM (K ALL_TAC)
169   >> `FINITE (s1 CROSS s2)`
170        by RW_TAC std_ss [FINITE_CROSS]
171   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(s1 :'d -> bool)
172                        CROSS (s2 :'e -> bool)`) REAL_SUM_IMAGE_IN_IF]
173   >> Suff `(\x.
174     (if x IN s1 CROSS s2 then
175        (\x.
176           logr b
177             (RN_deriv
178                (s1 CROSS s2,POW (s1 CROSS s2),
179                 prob
180                   (s1 CROSS s2,POW (s1 CROSS s2),
181                    prod_measure (s1,POW s1,distribution p X)
182                      (s2,POW s2,distribution p Y)))
183                (joint_distribution p X Y) x) *
184           measure
185             (s1 CROSS s2,POW (s1 CROSS s2),joint_distribution p X Y)
186             {x}) x
187      else
188        0)) =
189        (\x.
190     (if x IN s1 CROSS s2 then
191        (\(x,y).
192           joint_distribution p X Y {(x,y)} *
193           logr b
194             (joint_distribution p X Y {(x,y)} /
195              (distribution p X {x} * distribution p Y {y}))) x
196      else
197        0))`
198   >- RW_TAC std_ss []
199   >> RW_TAC std_ss [FUN_EQ_THM]
200   >> RW_TAC std_ss [PROB, measure_def]
201   >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR]
202   >> POP_ORW
203   >> RW_TAC std_ss []
204   >> Cases_on `joint_distribution p X Y {x} = 0`
205   >- RW_TAC real_ss []
206   >> RW_TAC std_ss [REAL_MUL_COMM, REAL_EQ_LMUL]
207   >> Suff `(RN_deriv
208     (s1 CROSS s2,POW (s1 CROSS s2),
209      prod_measure (s1,POW s1,distribution p X)
210        (s2,POW s2,distribution p Y)) (joint_distribution p X Y) x) =
211        (joint_distribution p X Y {x} /
212        (distribution p X {FST x} * distribution p Y {SND x}))`
213   >- RW_TAC std_ss []
214   >> RW_TAC std_ss [RN_deriv_def]
215   >> Suff `(\f. f x = joint_distribution p X Y {x} /
216        (distribution p X {FST x} * distribution p Y {SND x}))
217        (@f.
218   measure_space
219     (s1 CROSS s2,POW (s1 CROSS s2),
220      prod_measure (s1,POW s1,distribution p X)
221        (s2,POW s2,distribution p Y)) /\
222   measure_space
223     (m_space
224        (s1 CROSS s2,POW (s1 CROSS s2),
225         prod_measure (s1,POW s1,distribution p X)
226           (s2,POW s2,distribution p Y)),
227      measurable_sets
228        (s1 CROSS s2,POW (s1 CROSS s2),
229         prod_measure (s1,POW s1,distribution p X)
230           (s2,POW s2,distribution p Y)),joint_distribution p X Y) /\
231   f IN
232   borel_measurable
233     (m_space
234        (s1 CROSS s2,POW (s1 CROSS s2),
235         prod_measure (s1,POW s1,distribution p X)
236           (s2,POW s2,distribution p Y)),
237      measurable_sets
238        (s1 CROSS s2,POW (s1 CROSS s2),
239         prod_measure (s1,POW s1,distribution p X)
240           (s2,POW s2,distribution p Y))) /\
241   !a.
242     a IN
243     measurable_sets
244       (s1 CROSS s2,POW (s1 CROSS s2),
245        prod_measure (s1,POW s1,distribution p X)
246          (s2,POW s2,distribution p Y)) ==>
247     (integral
248        (s1 CROSS s2,POW (s1 CROSS s2),
249         prod_measure (s1,POW s1,distribution p X)
250           (s2,POW s2,distribution p Y)) (\x. f x * indicator_fn a x) =
251      joint_distribution p X Y a))`
252   >- RW_TAC std_ss []
253   >> MATCH_MP_TAC SELECT_ELIM_THM
254   >> `measure_space (s1 CROSS s2, POW (s1 CROSS s2),
255                   prod_measure
256                     (s1,POW s1,distribution p X)
257                     (s2,POW s2,distribution p Y))`
258        by METIS_TAC [measurable_sets_def, distribution_prob_space, space_def, subsets_def, m_space_def,
259                      prob_space_def, measure_space_finite_prod_measure_POW2]
260   >> RW_TAC std_ss [measurable_sets_def, m_space_def]
261   >- (Q.EXISTS_TAC `\x'. joint_distribution p X Y {x'} /
262                         ((distribution p X {FST x'})*(distribution p Y {SND x'}))`
263       >> STRONG_CONJ_TAC
264       >- (`(s1 CROSS s2,POW (s1 CROSS s2)) =
265            (m_space (s1 CROSS s2,POW (s1 CROSS s2),
266                prod_measure (s1,POW s1,distribution p X)
267                (s2,POW s2,distribution p Y)),
268             measurable_sets (s1 CROSS s2,POW (s1 CROSS s2),
269                prod_measure (s1,POW s1,distribution p X)
270                (s2,POW s2,distribution p Y)))` by RW_TAC std_ss [measurable_sets_def, m_space_def]
271            >> POP_ORW
272            >> ASM_SIMP_TAC std_ss [borel_measurable_le_iff, IN_POW, SUBSET_DEF, GSPECIFICATION,
273                                    measurable_sets_def, m_space_def])
274       >> RW_TAC std_ss [IN_POW]
275       >> (MP_TAC o Q.SPEC `(s1 CROSS s2,POW (s1 CROSS s2),prod_measure (s1,POW s1,distribution p X)
276                        (s2,POW s2,distribution p Y))`
277        o INST_TYPE [``:'a`` |-> ``:'d # 'e``]) finite_space_POW_integral_reduce
278       >> ASM_SIMP_TAC std_ss [m_space_def, measurable_sets_def, FINITE_CROSS, measure_def]
279       >> STRIP_TAC >> POP_ASSUM (K ALL_TAC)
280       >> `(s1 CROSS s2) = a UNION ((s1 CROSS s2) DIFF a)`
281                by (ONCE_REWRITE_TAC [EXTENSION]
282                    >> RW_TAC std_ss [IN_UNION, IN_DIFF] >> METIS_TAC [SUBSET_DEF])
283       >> POP_ORW
284       >> `DISJOINT a ((s1 CROSS s2) DIFF a)`
285                by (RW_TAC std_ss [IN_DISJOINT, IN_DIFF] >> DECIDE_TAC)
286       >> `FINITE a` by METIS_TAC [SUBSET_FINITE, FINITE_CROSS, IMAGE_FINITE]
287       >> RW_TAC std_ss [FINITE_DIFF, REAL_SUM_IMAGE_DISJOINT_UNION]
288       >> `FINITE ((s1 CROSS s2) DIFF a)`
289                by RW_TAC std_ss [FINITE_DIFF]
290       >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `((s1 :'d -> bool) CROSS (s2 :'e -> bool)) DIFF a`)
291                                REAL_SUM_IMAGE_IN_IF]
292       >> `(\x. (if x IN s1 CROSS s2 DIFF a then
293            (\x'.
294               joint_distribution p X Y {x'} /
295               (distribution p X {FST x'} * distribution p Y {SND x'}) * indicator_fn a x' *
296               prod_measure (s1,POW s1,distribution p X)
297             (s2,POW s2,distribution p Y) {x'}) x else 0)) = (\x. 0)`
298                by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [IN_DIFF, indicator_fn_def])
299       >> RW_TAC real_ss [REAL_SUM_IMAGE_0]
300       >> POP_ASSUM (K ALL_TAC)
301       >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(a :'d # 'e -> bool)`) REAL_SUM_IMAGE_IN_IF]
302       >> Know `(\x. (if x IN a then (\x'.
303           joint_distribution p X Y {x'} /
304           (distribution p X {FST x'} * distribution p Y {SND x'}) *
305           indicator_fn a x' *
306           prod_measure (s1,POW s1,distribution p X)
307             (s2,POW s2,distribution p Y) {x'}) x else 0)) =
308            (\x. if x IN a then
309                (\x. joint_distribution p X Y {x}) x else 0)`
310
311       >- ( RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [indicator_fn_def]
312            >> `{x'} = {FST x'} CROSS {SND x'}`
313                        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_SING, IN_CROSS]
314                            >> (MP_TAC o Q.ISPEC `(x'' :'d # 'e)`) pair_CASES
315                            >> (MP_TAC o Q.ISPEC `(x' :'d # 'e)`) pair_CASES
316                            >> RW_TAC std_ss [])
317            >> POP_ORW
318            >> `prod_measure (s1,POW s1,distribution p X) (s2,POW s2,distribution p Y)
319                ({FST x'} CROSS {SND x'}) =
320                measure (s1,POW s1,distribution p X) {FST x'} *
321                measure (s2,POW s2,distribution p Y) {SND x'}`
322                by (`measure_space (s1,POW s1,distribution p X) /\
323                     measure_space (s2,POW s2,distribution p Y)`
324                        by METIS_TAC [distribution_prob_space, prob_space_def, space_def, subsets_def]
325                    >> `{FST x'} IN measurable_sets (s1,POW s1,distribution p X) /\
326                        {SND x'} IN measurable_sets (s2,POW s2,distribution p Y)`
327                        by (RW_TAC std_ss [measurable_sets_def, IN_POW, SUBSET_DEF, IN_SING]
328                            >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_CROSS])
329                    >> METIS_TAC [finite_POW_prod_measure_reduce, m_space_def, measurable_sets_def])
330            >> POP_ORW
331            >> RW_TAC std_ss [measure_def]
332            >> Cases_on `(distribution p X {FST x'} * distribution p Y {SND x'}) = 0`
333            >- (Suff `joint_distribution p X Y ({FST x'} CROSS {SND x'}) = 0`
334                >- RW_TAC real_ss []
335                >> `PREIMAGE X {FST x'} INTER p_space p IN events p`
336                  by (Q.UNABBREV_TAC `s1`
337                      >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def,
338                                               subsets_def, IN_POW]
339                      >> Suff `{FST x'} SUBSET IMAGE X (p_space p)` >- METIS_TAC []
340                      >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS])
341                >> `PREIMAGE Y {SND x'} INTER p_space p IN events p`
342                  by (Q.UNABBREV_TAC `s2`
343                      >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def,
344                                               subsets_def, IN_POW]
345                      >> Suff `{SND x'} SUBSET IMAGE Y (p_space p)` >- METIS_TAC []
346                      >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS])
347                >> `(PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p) IN events p`
348                  by (`PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p =
349                      (PREIMAGE X {FST x'} INTER p_space p) INTER
350                      (PREIMAGE Y {SND x'} INTER p_space p)`
351                        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER] >> DECIDE_TAC)
352                      >> POP_ORW
353                      >> FULL_SIMP_TAC std_ss [random_variable_def, prob_space_def, measure_space_def,
354                                               events_def]
355                      >> METIS_TAC [sigma_algebra_def, ALGEBRA_INTER, space_def, subsets_def])
356                >> FULL_SIMP_TAC std_ss [REAL_ENTIRE, distribution_def, joint_distribution_def]
357                >- (`(PREIMAGE (\x. (X x,Y x)) ({FST x'} CROSS {SND x'}) INTER p_space p) =
358                     (PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p)`
359                        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_CROSS, IN_SING])
360                    >> POP_ORW
361                    >> RW_TAC std_ss [GSYM REAL_LE_ANTISYM]
362                    >- (Suff `(PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p) SUBSET
363                              (PREIMAGE X {FST x'} INTER p_space p)`
364                        >- METIS_TAC [REAL_LE_TRANS, REAL_LE_REFL, PROB_INCREASING, random_variable_def]
365                        >> RW_TAC std_ss [SUBSET_DEF, IN_INTER])
366                    >> METIS_TAC [random_variable_def, PROB_POSITIVE])
367                    >> `(PREIMAGE (\x. (X x,Y x)) ({FST x'} CROSS {SND x'}) INTER p_space p) =
368                        (PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p)`
369                          by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_CROSS, IN_SING])
370                    >> POP_ORW
371                    >> RW_TAC std_ss [GSYM REAL_LE_ANTISYM]
372                    >- (Suff `(PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p) SUBSET
373                              (PREIMAGE Y {SND x'} INTER p_space p)`
374                        >- METIS_TAC [REAL_LE_TRANS, REAL_LE_REFL, PROB_INCREASING, random_variable_def]
375                        >> RW_TAC std_ss [SUBSET_DEF, IN_INTER])
376                    >> METIS_TAC [random_variable_def, PROB_POSITIVE])
377            >> RW_TAC std_ss [real_div, measure_def]
378            >> `joint_distribution p X Y ({FST x'} CROSS {SND x'}) *
379                        inv (distribution p X {FST x'} * distribution p Y {SND x'}) *
380                        (distribution p X {FST x'} * distribution p Y {SND x'}) =
381                        (inv (distribution p X {FST x'} * distribution p Y {SND x'}) *
382                        (distribution p X {FST x'} * distribution p Y {SND x'})) *
383                        joint_distribution p X Y ({FST x'} CROSS {SND x'})`
384                        by REAL_ARITH_TAC
385            >> POP_ORW
386            >> RW_TAC real_ss [REAL_MUL_LINV])
387       >> DISCH_TAC
388       >> POP_ORW
389       >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF]
390       >> Q.PAT_X_ASSUM `a SUBSET s1 CROSS s2` MP_TAC
391       >> CONV_TAC (UNBETA_CONV ``(a :'d # 'e -> bool)``)
392       >> Q.PAT_X_ASSUM `FINITE a` MP_TAC
393       >> Q.SPEC_TAC (`a`,`a`)
394       >> MATCH_MP_TAC FINITE_INDUCT
395       >> FULL_SIMP_TAC std_ss [random_variable_def]
396       >> RW_TAC std_ss [REAL_SUM_IMAGE_THM, PREIMAGE_EMPTY, INTER_EMPTY, PROB_EMPTY, DELETE_NON_ELEMENT,
397                         joint_distribution_def]
398       >> `(PREIMAGE (\x. (X x,Y x)) (e INSERT s) INTER p_space p) =
399           (PREIMAGE (\x. (X x,Y x)) {e} INTER p_space p) UNION
400           (PREIMAGE (\x. (X x,Y x)) s INTER p_space p)`
401                by (ONCE_REWRITE_TAC [EXTENSION]
402                    >> RW_TAC std_ss [IN_UNION, IN_INTER, IN_PREIMAGE, IN_INSERT, NOT_IN_EMPTY]
403                    >> FULL_SIMP_TAC std_ss [GSYM DELETE_NON_ELEMENT, SUBSET_DEF, IN_INSERT, IN_CROSS, IN_IMAGE]
404                    >> METIS_TAC [FST, SND])
405       >> POP_ORW
406       >> `DISJOINT (PREIMAGE (\x. (X x,Y x)) {e} INTER p_space p)
407                    (PREIMAGE (\x. (X x,Y x)) s INTER p_space p)`
408                by (RW_TAC std_ss [IN_DISJOINT, IN_INTER, IN_PREIMAGE, IN_SING]
409                    >> FULL_SIMP_TAC std_ss [GSYM DELETE_NON_ELEMENT]
410                    >> METIS_TAC [])
411       >> `prob p (PREIMAGE (\x. (X x,Y x)) {e} INTER p_space p UNION
412                   PREIMAGE (\x. (X x,Y x)) s INTER p_space p) =
413           prob p (PREIMAGE (\x. (X x,Y x)) {e} INTER p_space p) +
414           prob p (PREIMAGE (\x. (X x,Y x)) s INTER p_space p)`
415                by (MATCH_MP_TAC PROB_ADDITIVE
416                    >> RW_TAC std_ss []
417                    >> METIS_TAC [SUBSET_DEF, IN_INTER, IN_POW])
418       >> POP_ORW
419       >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_INSERT])
420   >> POP_ASSUM (MP_TAC o Q.SPEC `{x}`)
421   >> RW_TAC std_ss [IN_POW, SUBSET_DEF, IN_SING, finite_space_POW_integral_reduce,
422                     FINITE_CROSS, IMAGE_FINITE, measurable_sets_def, m_space_def,
423                     measure_def]
424   >> POP_ASSUM MP_TAC
425   >> `(s1 CROSS s2) =
426        x INSERT ((s1 CROSS s2) DELETE x)`
427        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INSERT, IN_DELETE] >> METIS_TAC [])
428   >> POP_ORW
429   >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_THM, FINITE_CROSS, FINITE_DELETE, DELETE_DELETE]
430   >> `FINITE (s1 CROSS s2 DELETE x)`
431        by RW_TAC std_ss [FINITE_DELETE, FINITE_CROSS]
432   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `((s1 :'d -> bool) CROSS
433                        (s2 :'e -> bool) DELETE (x :'d # 'e))`) REAL_SUM_IMAGE_IN_IF]
434   >> `(\x''.
435      (if x'' IN s1 CROSS s2 DELETE x then
436         (\x''.
437            x' x'' * indicator_fn {x} x'' *
438            prod_measure (s1,POW s1,distribution p X)
439              (s2,POW s2,distribution p Y) {x''}) x''
440       else
441         0)) = (\x'. 0)`
442        by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [indicator_fn_def, IN_SING, IN_DELETE])
443   >> POP_ORW
444   >> ASM_SIMP_TAC real_ss [REAL_SUM_IMAGE_0, indicator_fn_def, IN_SING]
445   >> `{(x :'d # 'e)} = {FST x} CROSS {SND x}`
446                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_SING, IN_CROSS]
447                            >> (MP_TAC o Q.ISPEC `(x'' :'d # 'e)`) pair_CASES
448                            >> (MP_TAC o Q.ISPEC `(x :'d # 'e)`) pair_CASES
449                            >> RW_TAC std_ss [])
450   >> FULL_SIMP_TAC std_ss [] >> POP_ASSUM (K ALL_TAC)
451   >> `prod_measure (s1,POW s1,distribution p X) (s2,POW s2,distribution p Y)
452                        ({FST x} CROSS {SND x}) =
453                        measure (s1,POW s1,distribution p X) {FST x} *
454                        measure (s2,POW s2,distribution p Y) {SND x}`
455                        by (`measure_space (s1,POW s1,distribution p X) /\
456                             measure_space (s2,POW s2,distribution p Y)`
457                                by METIS_TAC [distribution_prob_space, prob_space_def, space_def, subsets_def]
458                            >> `{FST x} IN measurable_sets (s1,POW s1,distribution p X) /\
459                                {SND x} IN measurable_sets (s2,POW s2,distribution p Y)`
460                                by (RW_TAC std_ss [measurable_sets_def, IN_POW, SUBSET_DEF, IN_SING]
461                                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_CROSS])
462                            >> METIS_TAC [finite_POW_prod_measure_reduce, m_space_def, measurable_sets_def])
463   >> POP_ORW
464   >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR]
465   >> POP_ORW
466   >> FULL_SIMP_TAC std_ss [measure_def, joint_distribution_def, distribution_def]
467   >> `PREIMAGE (\x. (X x,Y x)) ({FST x} CROSS {SND x}) INTER
468        p_space p =
469        PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER p_space p`
470        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING, IN_CROSS])
471   >> FULL_SIMP_TAC std_ss [] >> POP_ASSUM (K ALL_TAC)
472   >> POP_ORW
473   >> Suff `0 < (prob p (PREIMAGE X {FST x} INTER p_space p) *
474            prob p (PREIMAGE Y {SND x} INTER p_space p))`
475   >- RW_TAC real_ss [REAL_EQ_RDIV_EQ]
476   >> MATCH_MP_TAC REAL_LT_MUL
477   >> SIMP_TAC std_ss [REAL_LT_LE]
478   >> `PREIMAGE X {FST x} INTER p_space p IN events p`
479                by (Q.UNABBREV_TAC `s1`
480                    >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def,
481                                                          IN_POW]
482                    >> Suff `{FST x} SUBSET IMAGE X (p_space p)` >- METIS_TAC []
483                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS])
484   >> `PREIMAGE Y {SND x} INTER p_space p IN events p`
485                by (Q.UNABBREV_TAC `s2`
486                    >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def,
487                                                          IN_POW]
488                    >> Suff `{SND x} SUBSET IMAGE Y (p_space p)` >- METIS_TAC []
489                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS])
490   >> `PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER p_space p IN events p`
491                by (`PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER p_space p =
492                                    (PREIMAGE X {FST x} INTER p_space p) INTER
493                                    (PREIMAGE Y {SND x} INTER p_space p)`
494                                        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER] >> DECIDE_TAC)
495                    >> POP_ORW
496                    >> FULL_SIMP_TAC std_ss [random_variable_def, prob_space_def, measure_space_def,
497                                             events_def]
498                    >> METIS_TAC [sigma_algebra_def, ALGEBRA_INTER, space_def, subsets_def])
499    >> Suff `~(0 = prob p (PREIMAGE X {FST x} INTER p_space p)) /\
500                     ~(0 = prob p (PREIMAGE Y {SND x} INTER p_space p))`
501    >- METIS_TAC [random_variable_def, PROB_POSITIVE]
502    >> `0 <= prob p
503              (PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER
504               p_space p)`
505                by METIS_TAC [random_variable_def, PROB_POSITIVE]
506    >> (CONJ_TAC >> SPOSE_NOT_THEN STRIP_ASSUME_TAC)
507    >- (Suff `prob p (PREIMAGE (X :'a -> 'd) {FST x} INTER PREIMAGE (Y :'a -> 'e) {SND x} INTER p_space p) <=
508              prob p (PREIMAGE (X :'a -> 'd) {FST x} INTER p_space p)`
509                >- METIS_TAC [REAL_LE_ANTISYM]
510                >> MATCH_MP_TAC PROB_INCREASING
511                >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER])
512    >> Suff `prob p (PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER p_space p) <=
513                      prob p (PREIMAGE Y {SND x} INTER p_space p)`
514    >- METIS_TAC [REAL_LE_ANTISYM]
515    >> MATCH_MP_TAC PROB_INCREASING
516    >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER]);
517
518val finite_entropy_reduce = store_thm
519  ("finite_entropy_reduce",
520   ``!b p X. (POW (p_space p) = events p) /\
521             random_variable X p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) /\
522                    FINITE (p_space p) ==>
523        (entropy b p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) X =
524         ~ SIGMA (\x. distribution p X {x} * logr b (distribution p X {x}))
525               (IMAGE X (p_space p)))``,
526   RW_TAC std_ss [entropy_def, finite_mutual_information_reduce]
527   >> `(IMAGE X (p_space p) CROSS IMAGE X (p_space p)) =
528        (IMAGE (\x. (x,x)) (IMAGE X (p_space p))) UNION
529        ((IMAGE X (p_space p) CROSS IMAGE X (p_space p)) DIFF
530         (IMAGE (\x. (x,x)) (IMAGE X (p_space p))))`
531        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_CROSS, IN_IMAGE, IN_UNION, IN_DIFF]
532            >> EQ_TAC
533            >- (RW_TAC std_ss [] >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR]
534                >> POP_ORW >> METIS_TAC [])
535            >> RW_TAC std_ss [FST, SND] >> RW_TAC std_ss [FST, SND]
536            >> METIS_TAC [])
537   >> POP_ORW
538   >> `DISJOINT (IMAGE (\x. (x,x)) (IMAGE X (p_space p)))
539                (IMAGE X (p_space p) CROSS IMAGE X (p_space p) DIFF
540                 IMAGE (\x. (x,x)) (IMAGE X (p_space p)))`
541        by (RW_TAC std_ss [IN_DISJOINT, IN_DIFF] >> DECIDE_TAC)
542   >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_DISJOINT_UNION, FINITE_CROSS, IMAGE_FINITE, FINITE_DIFF]
543   >> `FINITE (IMAGE X (p_space p) CROSS IMAGE X (p_space p) DIFF
544               IMAGE (\x. (x,x)) (IMAGE X (p_space p)))`
545        by RW_TAC std_ss [FINITE_CROSS, IMAGE_FINITE, FINITE_DIFF]
546   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p) CROSS IMAGE (X :'a -> 'b) (p_space p) DIFF
547               IMAGE (\x. (x,x)) (IMAGE (X :'a -> 'b) (p_space p)))`) REAL_SUM_IMAGE_IN_IF]
548   >> `(\x.
549         (if
550            x IN
551            IMAGE X (p_space p) CROSS IMAGE X (p_space p) DIFF
552            IMAGE (\x. (x,x)) (IMAGE X (p_space p))
553          then
554            (\(x,y).
555               joint_distribution p X X {(x,y)} *
556               logr b
557                 (joint_distribution p X X {(x,y)} / (distribution p X {x} * distribution p X {y})))
558              x
559          else
560            0)) = (\x. 0)`
561        by (RW_TAC std_ss [FUN_EQ_THM, IN_IMAGE, IN_DIFF] >> RW_TAC std_ss []
562            >> FULL_SIMP_TAC std_ss [IN_CROSS, IN_IMAGE]
563            >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR]
564            >> POP_ORW
565            >> ASM_SIMP_TAC std_ss [joint_distribution_def]
566            >> `PREIMAGE (\x. (X x,X x)) {x} INTER p_space p =
567                PREIMAGE X {X x'} INTER PREIMAGE X {X x''} INTER p_space p`
568                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING, IN_CROSS]
569                    >> METIS_TAC [PAIR, FST, SND, PAIR_EQ])
570            >> POP_ORW
571            >> Suff `PREIMAGE X {X x'} INTER PREIMAGE X {X x''} INTER p_space p = {}`
572            >- FULL_SIMP_TAC real_ss [PROB_EMPTY, random_variable_def]
573            >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_PREIMAGE, IN_INTER, IN_SING, NOT_IN_EMPTY]
574            >> METIS_TAC [PAIR])
575   >> POP_ORW >> ASM_SIMP_TAC real_ss [REAL_SUM_IMAGE_0]
576   >> `INJ (\x. (x,x)) (IMAGE X (p_space p)) (IMAGE (\x. (x,x)) (IMAGE X (p_space p)))`
577        by RW_TAC std_ss [INJ_DEF, IN_IMAGE]
578   >> ASM_SIMP_TAC std_ss [IMAGE_FINITE, REAL_SUM_IMAGE_IMAGE, o_DEF]
579   >> `~SIGMA (\x. distribution p X {x} * logr b (distribution p X {x})) (IMAGE X (p_space p)) =
580       ~1 * SIGMA (\x. distribution p X {x} * logr b (distribution p X {x})) (IMAGE X (p_space p))`
581        by REAL_ARITH_TAC
582   >> POP_ORW
583   >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_CMUL, IMAGE_FINITE]
584   >> Suff `(\x.
585         joint_distribution p X X {(x,x)} *
586         logr b (joint_distribution p X X {(x,x)} / (distribution p X {x} * distribution p X {x}))) =
587        (\x. ~1 * (distribution p X {x} * logr b (distribution p X {x})))`
588   >- RW_TAC std_ss []
589   >> RW_TAC std_ss [FUN_EQ_THM, joint_distribution_def, distribution_def]
590   >> `PREIMAGE (\x. (X x,X x)) {(x,x)} INTER p_space p =
591                PREIMAGE X {x} INTER p_space p`
592                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING, IN_CROSS, INTER_IDEMPOT]
593                    >> METIS_TAC [PAIR, FST, SND, PAIR_EQ])
594   >> POP_ORW
595   >> RW_TAC real_ss []
596   >> Cases_on `prob p (PREIMAGE X {x} INTER p_space p) = 0`
597   >- RW_TAC real_ss []
598   >> `0 < prob p (PREIMAGE X {x} INTER p_space p)`
599        by METIS_TAC [REAL_LT_LE, PROB_POSITIVE, random_variable_def, SUBSET_DEF, IN_POW, IN_INTER]
600   >> Suff `prob p (PREIMAGE X {x} INTER p_space p) /
601       (prob p (PREIMAGE X {x} INTER p_space p) *
602        prob p (PREIMAGE X {x} INTER p_space p)) =
603        inv (prob p (PREIMAGE X {x} INTER p_space p))`
604   >- RW_TAC std_ss [REAL_NEG_RMUL, logr_inv]
605   >> RW_TAC std_ss [real_div, REAL_INV_MUL]
606   >> RW_TAC real_ss [REAL_MUL_RINV, REAL_MUL_ASSOC]);
607
608val finite_mutual_information_reduce2 = store_thm
609  ("finite_mutual_information_reduce2",
610   ``!b p s1 s2 X Y Z. (POW (p_space p) = events p) /\
611             random_variable X p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) /\
612             random_variable Y p (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) /\
613             random_variable Z p (IMAGE Z (p_space p), POW (IMAGE Z (p_space p))) /\
614
615                    FINITE (p_space p) ==>
616        (mutual_information b p (IMAGE X (p_space p), POW (IMAGE X (p_space p)))
617                                (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p),
618                                 POW (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))) X (\x. (Y x,Z x)) =
619         SIGMA (\(x,(y,z)). (joint_distribution p X (\x. (Y x,Z x)) {(x,(y,z))}) *
620                         logr b ((joint_distribution p X (\x. (Y x,Z x)) {(x,(y,z))})/
621                                 ((distribution p X {x})*(distribution p (\x. (Y x,Z x)) {(y,z)}))))
622               ((IMAGE X (p_space p)) CROSS (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))))``,
623   RW_TAC std_ss [mutual_information_def, KL_divergence_def, space_def, subsets_def]
624   >> `FINITE (IMAGE X (p_space p)) /\ FINITE (IMAGE Y (p_space p)) /\ FINITE (IMAGE Z (p_space p)) /\
625       FINITE (IMAGE (\x. (Y x,Z x)) (p_space p))`
626        by RW_TAC std_ss [IMAGE_FINITE]
627   >> Q.ABBREV_TAC `s1 = IMAGE X (p_space p)`
628   >> Q.ABBREV_TAC `s2 = (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))`
629   >> `FINITE s2` by METIS_TAC [FINITE_CROSS]
630   >> Know `prod_space = (s1 CROSS s2, POW (s1 CROSS s2),
631                     prod_measure (s1, POW s1, distribution p X)
632                                  (s2, POW s2, distribution p (\x. (Y x,Z x))))`
633   >- ( Q.UNABBREV_TAC `prod_space`
634        >> RW_TAC std_ss [prod_measure_space_def, m_space_def, subsets_def, EXTENSION, subsets_def,
635                          sigma_def, prod_sets_def, IN_POW, IN_BIGINTER, measurable_sets_def,
636                          SUBSET_DEF, IN_CROSS, GSPECIFICATION]
637        >> RW_TAC std_ss [GSYM EXTENSION]
638        >> EQ_TAC
639        >- (RW_TAC std_ss []
640            >> Q.PAT_X_ASSUM `!s. P ==> x IN s` (MP_TAC o Q.SPEC `POW (s1 CROSS s2)`)
641            >> RW_TAC std_ss [IN_POW, SUBSET_DEF, IN_CROSS, POW_SIGMA_ALGEBRA]
642            >> Suff `(!x''. (?x'. (x'',T) = (\(s,t). (s CROSS t,
643                        (!x. x IN s ==> x IN s1) /\ !x. x IN t ==> x IN s2))
644                        x') ==> !x. x IN x'' ==> FST x IN s1 /\ SND x IN s2)` >- METIS_TAC []
645            >> RW_TAC std_ss []
646            >> (MP_TAC o Q.ISPEC `(x''' :('d -> bool) # ('e # 'f -> bool))`) pair_CASES
647            >> STRIP_TAC >> FULL_SIMP_TAC std_ss [] >> METIS_TAC [IN_CROSS])
648
649        >> RW_TAC std_ss []
650        >> `x = BIGUNION (IMAGE (\a. {a}) x)`
651                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_BIGUNION_IMAGE, IN_SING])
652        >> POP_ORW
653        >> FULL_SIMP_TAC std_ss [SIGMA_ALGEBRA, subsets_def]
654        >> POP_ASSUM MATCH_MP_TAC
655        >> CONJ_TAC
656        >- (MATCH_MP_TAC FINITE_COUNTABLE >> MATCH_MP_TAC IMAGE_FINITE
657                >> (MP_TAC o Q.ISPEC `(s1 :'d -> bool) CROSS (s2 :'e # 'f -> bool)`) SUBSET_FINITE
658                >> RW_TAC std_ss [FINITE_CROSS]
659                >> POP_ASSUM MATCH_MP_TAC
660                >> METIS_TAC [SUBSET_DEF, IN_CROSS])
661        >> RW_TAC std_ss [SUBSET_DEF, IN_IMAGE, IN_SING]
662        >> Q.PAT_X_ASSUM `!x. Q ==> x IN P` MATCH_MP_TAC
663        >> Q.EXISTS_TAC `({FST a}, {SND a})` >> RW_TAC std_ss [PAIR_EQ, IN_SING]
664        >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_CROSS, IN_SING]
665        >> METIS_TAC [PAIR_EQ, PAIR, FST, SND] )
666   >> DISCH_TAC
667   >> POP_ORW
668   >> RW_TAC std_ss [PSPACE, EVENTS]
669   >> `random_variable (\x. (Y x, Z x)) p (s2, POW(s2))`
670        by (RW_TAC std_ss [random_variable_def]
671            >- FULL_SIMP_TAC std_ss [random_variable_def]
672            >> FULL_SIMP_TAC std_ss [IN_MEASURABLE, random_variable_def, POW_SIGMA_ALGEBRA, IN_FUNSET,
673                                     space_def, subsets_def, IN_POW]
674            >> CONJ_TAC >- (Q.UNABBREV_TAC `s2` >> RW_TAC std_ss [IN_IMAGE, IN_CROSS] >> METIS_TAC [])
675            >> METIS_TAC [SUBSET_DEF, IN_INTER, IN_POW])
676   >> (MP_TAC o Q.SPECL [`p`, `s1`, `s2`, `X`, `(\x. (Y x,Z x))`] o
677        INST_TYPE [``:'c`` |-> ``:'e#'f``, ``:'b``|->``:'d``]) finite_marginal_product_space_POW2
678   >> ASM_SIMP_TAC std_ss []
679   >> STRIP_TAC
680   >> (MP_TAC o Q.SPEC `(s1 CROSS s2,POW (s1 CROSS s2),joint_distribution p X (\x. (Y x,Z x)))`
681        o INST_TYPE [``:'a`` |-> ``:'d # 'e # 'f``]) finite_space_POW_integral_reduce
682   >> ASM_SIMP_TAC std_ss [m_space_def, measurable_sets_def, FINITE_CROSS]
683   >> STRIP_TAC >> POP_ASSUM (K ALL_TAC)
684   >> `FINITE (s1 CROSS s2)`
685        by RW_TAC std_ss [FINITE_CROSS]
686   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(s1 :'d -> bool)
687                        CROSS (s2 :'e # 'f -> bool)`) REAL_SUM_IMAGE_IN_IF]
688   >> Suff `(\x.
689     (if x IN s1 CROSS s2 then
690        (\x.
691           logr b
692             (RN_deriv
693                (s1 CROSS s2,POW (s1 CROSS s2),
694                 prob
695                   (s1 CROSS s2,POW (s1 CROSS s2),
696                    prod_measure (s1,POW s1,distribution p X)
697                      (s2,POW s2,distribution p (\x. (Y x,Z x)))))
698                (joint_distribution p X (\x. (Y x,Z x))) x) *
699           measure
700             (s1 CROSS s2,POW (s1 CROSS s2),
701              joint_distribution p X (\x. (Y x,Z x))) {x}) x
702      else
703        0)) =
704        (\x.
705     (if x IN s1 CROSS s2 then
706        (\(x,y,z).
707           joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
708           logr b
709             (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
710              (distribution p X {x} *
711               distribution p (\x. (Y x,Z x)) {(y,z)}))) x
712      else
713        0))`
714   >- RW_TAC std_ss []
715   >> RW_TAC std_ss [FUN_EQ_THM]
716   >> RW_TAC std_ss [PROB, measure_def]
717   >> `x = (FST x, FST(SND x), SND(SND x))` by RW_TAC std_ss [PAIR]
718   >> POP_ORW
719   >> RW_TAC std_ss []
720   >> Cases_on `joint_distribution p X (\x. (Y x,Z x)) {x} = 0`
721   >- RW_TAC real_ss []
722   >> RW_TAC std_ss [REAL_MUL_COMM, REAL_EQ_LMUL]
723   >> Suff `(RN_deriv
724     (s1 CROSS s2,POW (s1 CROSS s2),
725      prod_measure (s1,POW s1,distribution p X)
726        (s2,POW s2,distribution p (\x. (Y x,Z x))))
727     (joint_distribution p X (\x. (Y x,Z x))) x) =
728        (joint_distribution p X (\x. (Y x,Z x)) {x} /
729   (distribution p X {FST x} * distribution p (\x. (Y x,Z x)) {SND x}))`
730   >- RW_TAC std_ss []
731   >> RW_TAC std_ss [RN_deriv_def]
732   >> Suff `(\f. f x = joint_distribution p X (\x. (Y x,Z x)) {x} /
733(distribution p X {FST x} * distribution p (\x. (Y x,Z x)) {SND x}))
734        (@f.
735   measure_space
736     (s1 CROSS s2,POW (s1 CROSS s2),
737      prod_measure (s1,POW s1,distribution p X)
738        (s2,POW s2,distribution p (\x. (Y x,Z x)))) /\
739   measure_space
740     (m_space
741        (s1 CROSS s2,POW (s1 CROSS s2),
742         prod_measure (s1,POW s1,distribution p X)
743           (s2,POW s2,distribution p (\x. (Y x,Z x)))),
744      measurable_sets
745        (s1 CROSS s2,POW (s1 CROSS s2),
746         prod_measure (s1,POW s1,distribution p X)
747           (s2,POW s2,distribution p (\x. (Y x,Z x)))),
748      joint_distribution p X (\x. (Y x,Z x))) /\
749   f IN
750   borel_measurable
751     (m_space
752        (s1 CROSS s2,POW (s1 CROSS s2),
753         prod_measure (s1,POW s1,distribution p X)
754           (s2,POW s2,distribution p (\x. (Y x,Z x)))),
755      measurable_sets
756        (s1 CROSS s2,POW (s1 CROSS s2),
757         prod_measure (s1,POW s1,distribution p X)
758           (s2,POW s2,distribution p (\x. (Y x,Z x))))) /\
759   !a.
760     a IN
761     measurable_sets
762       (s1 CROSS s2,POW (s1 CROSS s2),
763        prod_measure (s1,POW s1,distribution p X)
764          (s2,POW s2,distribution p (\x. (Y x,Z x)))) ==>
765     (integral
766        (s1 CROSS s2,POW (s1 CROSS s2),
767         prod_measure (s1,POW s1,distribution p X)
768           (s2,POW s2,distribution p (\x. (Y x,Z x))))
769        (\x. f x * indicator_fn a x) =
770      joint_distribution p X (\x. (Y x,Z x)) a))`
771   >- RW_TAC std_ss []
772   >> MATCH_MP_TAC SELECT_ELIM_THM
773   >> `measure_space (s1 CROSS s2, POW (s1 CROSS s2),
774                   prod_measure (s1,POW s1,distribution p X)
775          (s2,POW s2,distribution p (\x. (Y x,Z x))))`
776        by METIS_TAC [measurable_sets_def, distribution_prob_space, space_def, subsets_def, m_space_def,
777                      prob_space_def, measure_space_finite_prod_measure_POW2]
778   >> RW_TAC std_ss [measurable_sets_def, m_space_def]
779   >- (Q.EXISTS_TAC `\x'. joint_distribution p X (\x. (Y x,Z x)) {x'} /
780                        ((distribution p X {FST x'})*(distribution p (\x. (Y x,Z x)) {SND x'}))`
781       >> STRONG_CONJ_TAC
782       >- (`(s1 CROSS s2,POW (s1 CROSS s2)) =
783            (m_space (s1 CROSS s2,POW (s1 CROSS s2),
784          prod_measure (s1,POW s1,distribution p X)
785            (s2,POW s2,distribution p (\x. (Y x,Z x)))),
786             measurable_sets (s1 CROSS s2,POW (s1 CROSS s2),
787          prod_measure (s1,POW s1,distribution p X)
788            (s2,POW s2,distribution p (\x. (Y x,Z x)))))` by RW_TAC std_ss [measurable_sets_def, m_space_def]
789            >> POP_ORW
790            >> ASM_SIMP_TAC std_ss [borel_measurable_le_iff, IN_POW, SUBSET_DEF, GSPECIFICATION, measurable_sets_def, m_space_def])
791       >> RW_TAC std_ss [IN_POW]
792       >> (MP_TAC o Q.SPEC `(s1 CROSS s2,POW (s1 CROSS s2),
793          prod_measure (s1,POW s1,distribution p X)
794            (s2,POW s2,distribution p (\x. (Y x,Z x))))`
795        o INST_TYPE [``:'a`` |-> ``:'d # 'e # 'f``]) finite_space_POW_integral_reduce
796       >> ASM_SIMP_TAC std_ss [m_space_def, measurable_sets_def, FINITE_CROSS, measure_def]
797       >> STRIP_TAC >> POP_ASSUM (K ALL_TAC)
798       >> `(s1 CROSS s2) =
799        a UNION ((s1 CROSS s2) DIFF a)`
800                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNION, IN_DIFF] >> METIS_TAC [SUBSET_DEF])
801        >> POP_ORW
802        >> `DISJOINT a ((s1 CROSS s2) DIFF a)`
803                by (RW_TAC std_ss [IN_DISJOINT, IN_DIFF] >> DECIDE_TAC)
804        >> `FINITE a` by METIS_TAC [SUBSET_FINITE, FINITE_CROSS, IMAGE_FINITE]
805        >> RW_TAC std_ss [FINITE_DIFF, REAL_SUM_IMAGE_DISJOINT_UNION]
806        >> `FINITE ((s1 CROSS s2) DIFF a)`
807                by RW_TAC std_ss [FINITE_DIFF]
808        >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `((s1 :'d -> bool) CROSS (s2 :'e # 'f -> bool)) DIFF a`) REAL_SUM_IMAGE_IN_IF]
809        >> `(\x. (if x IN s1 CROSS s2 DIFF a then (\x'.
810           joint_distribution p X (\x. (Y x,Z x)) {x'} /
811           (distribution p X {FST x'} *
812            distribution p (\x. (Y x,Z x)) {SND x'}) *
813           indicator_fn a x' *
814           prod_measure (s1,POW s1,distribution p X)
815             (s2,POW s2,distribution p (\x. (Y x,Z x))) {x'}) x else 0)) = (\x. 0)`
816                by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [IN_DIFF, indicator_fn_def])
817        >> RW_TAC real_ss [REAL_SUM_IMAGE_0]
818        >> POP_ASSUM (K ALL_TAC)
819        >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(a :'d # 'e # 'f -> bool)`) REAL_SUM_IMAGE_IN_IF]
820        >> `(\x. (if x IN a then (\x'.
821           joint_distribution p X (\x. (Y x,Z x)) {x'} /
822           (distribution p X {FST x'} *
823            distribution p (\x. (Y x,Z x)) {SND x'}) *
824           indicator_fn a x' *
825           prod_measure (s1,POW s1,distribution p X)
826             (s2,POW s2,distribution p (\x. (Y x,Z x))) {x'}) x else 0)) =
827            (\x. if x IN a then
828                (\x. joint_distribution p X (\x. (Y x,Z x)) {x}) x else 0)`
829                by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [indicator_fn_def]
830                    >> `{x'} = {FST x'} CROSS {SND x'}`
831                        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_SING, IN_CROSS]
832                            >> (MP_TAC o Q.ISPEC `(x'' :'d # 'e # 'f)`) pair_CASES
833                            >> (MP_TAC o Q.ISPEC `(x' :'d # 'e # 'f)`) pair_CASES
834                            >> RW_TAC std_ss [])
835                    >> POP_ORW
836                    >> `prod_measure (s1,POW s1,distribution p X) (s2,POW s2,distribution p (\x. (Y x,Z x)))
837                        ({FST x'} CROSS {SND x'}) =
838                        measure (s1,POW s1,distribution p X) {FST x'} *
839                        measure (s2,POW s2,distribution p (\x. (Y x,Z x))) {SND x'}`
840                        by (`measure_space (s1,POW s1,distribution p X) /\
841                             measure_space (s2,POW s2,distribution p (\x. (Y x,Z x)))`
842                                by METIS_TAC [distribution_prob_space, prob_space_def, space_def, subsets_def]
843                            >> `{FST x'} IN measurable_sets (s1,POW s1,distribution p X) /\
844                                {SND x'} IN measurable_sets (s2,POW s2,distribution p (\x. (Y x,Z x)))`
845                                by (RW_TAC std_ss [measurable_sets_def, IN_POW, SUBSET_DEF, IN_SING]
846                                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_CROSS])
847                            >> METIS_TAC [finite_POW_prod_measure_reduce, m_space_def, measurable_sets_def])
848                    >> POP_ORW
849                    >> RW_TAC std_ss [measure_def]
850                    >> Cases_on `(distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'}) = 0`
851                    >- (Suff `joint_distribution p X (\x. (Y x,Z x)) ({FST x'} CROSS {SND x'}) = 0`
852                        >- RW_TAC real_ss []
853                        >> `PREIMAGE X {FST x'} INTER p_space p IN events p`
854                                by (Q.UNABBREV_TAC `s1`
855                                    >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def,
856                                                          IN_POW]
857                                    >> Suff `{FST x'} SUBSET IMAGE X (p_space p)` >- METIS_TAC []
858                                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS])
859                        >> `PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p IN events p`
860                                by (Q.UNABBREV_TAC `s2`
861                                    >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def,
862                                                          IN_POW]
863                                    >> Suff `{SND x'} SUBSET IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)` >- METIS_TAC []
864                                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS])
865                        >> `(PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p) IN events p`
866                                by (`PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p =
867                                    (PREIMAGE X {FST x'} INTER p_space p) INTER
868                                    (PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p)`
869                                        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER] >> DECIDE_TAC)
870                                    >> POP_ORW
871                                    >> FULL_SIMP_TAC std_ss [random_variable_def, prob_space_def, measure_space_def,
872                                                             events_def]
873                                    >> METIS_TAC [sigma_algebra_def, ALGEBRA_INTER, space_def, subsets_def])
874                        >> FULL_SIMP_TAC std_ss [REAL_ENTIRE, distribution_def, joint_distribution_def]
875                        >- (`(PREIMAGE (\x. (X x,Y x,Z x)) ({FST x'} CROSS {SND x'}) INTER p_space p) =
876                            (PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p)`
877                                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_CROSS, IN_SING])
878                            >> POP_ORW
879                            >> RW_TAC std_ss [GSYM REAL_LE_ANTISYM]
880                            >- (Suff `(PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p) SUBSET
881                                      (PREIMAGE X {FST x'} INTER p_space p)`
882                                >- METIS_TAC [REAL_LE_TRANS, REAL_LE_REFL, PROB_INCREASING, random_variable_def]
883                                >> RW_TAC std_ss [SUBSET_DEF, IN_INTER])
884                            >> METIS_TAC [random_variable_def, PROB_POSITIVE])
885                        >> `(PREIMAGE (\x. (X x,Y x,Z x)) ({FST x'} CROSS {SND x'}) INTER p_space p) =
886                            (PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p)`
887                                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_CROSS, IN_SING])
888                        >> POP_ORW
889                        >> RW_TAC std_ss [GSYM REAL_LE_ANTISYM]
890                        >- (Suff `(PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p) SUBSET
891                                  (PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p)`
892                            >- METIS_TAC [REAL_LE_TRANS, REAL_LE_REFL, PROB_INCREASING, random_variable_def]
893                            >> RW_TAC std_ss [SUBSET_DEF, IN_INTER])
894                        >> METIS_TAC [random_variable_def, PROB_POSITIVE])
895                    >> RW_TAC std_ss [real_div, measure_def]
896                    >> `joint_distribution p X (\x. (Y x,Z x)) ({FST x'} CROSS {SND x'}) *
897                        inv (distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'}) *
898                        (distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'}) =
899                        (inv (distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'}) *
900                        (distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'})) *
901                        joint_distribution p X (\x. (Y x,Z x)) ({FST x'} CROSS {SND x'})`
902                        by REAL_ARITH_TAC
903                    >> POP_ORW
904                    >> RW_TAC real_ss [REAL_MUL_LINV])
905        >> POP_ORW
906        >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF]
907        >> Q.PAT_X_ASSUM `a SUBSET s1 CROSS s2` MP_TAC
908        >> CONV_TAC (UNBETA_CONV ``(a :'d # 'e # 'f -> bool)``)
909        >> Q.PAT_X_ASSUM `FINITE a` MP_TAC
910        >> Q.SPEC_TAC (`a`,`a`)
911        >> MATCH_MP_TAC FINITE_INDUCT
912        >> FULL_SIMP_TAC std_ss [random_variable_def]
913        >> RW_TAC std_ss [REAL_SUM_IMAGE_THM, PREIMAGE_EMPTY, INTER_EMPTY, PROB_EMPTY, DELETE_NON_ELEMENT,
914                          joint_distribution_def]
915        >> `(PREIMAGE (\x. (X x,Y x,Z x)) (e INSERT s) INTER p_space p) =
916            (PREIMAGE (\x. (X x,Y x,Z x)) {e} INTER p_space p) UNION
917            (PREIMAGE (\x. (X x,Y x,Z x)) s INTER p_space p)`
918                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNION, IN_INTER, IN_PREIMAGE, IN_INSERT, NOT_IN_EMPTY]
919                    >> FULL_SIMP_TAC std_ss [GSYM DELETE_NON_ELEMENT, SUBSET_DEF, IN_INSERT, IN_CROSS, IN_IMAGE]
920                    >> METIS_TAC [FST, SND])
921        >> POP_ORW
922        >> `DISJOINT (PREIMAGE (\x. (X x,Y x,Z x)) {e} INTER p_space p)
923                     (PREIMAGE (\x. (X x,Y x,Z x)) s INTER p_space p)`
924                by (RW_TAC std_ss [IN_DISJOINT, IN_INTER, IN_PREIMAGE, IN_SING]
925                    >> FULL_SIMP_TAC std_ss [GSYM DELETE_NON_ELEMENT]
926                    >> METIS_TAC [])
927        >> `prob p (PREIMAGE (\x. (X x,Y x,Z x)) {e} INTER p_space p UNION
928                    PREIMAGE (\x. (X x,Y x,Z x)) s INTER p_space p) =
929            prob p (PREIMAGE (\x. (X x,Y x,Z x)) {e} INTER p_space p) +
930            prob p (PREIMAGE (\x. (X x,Y x,Z x)) s INTER p_space p)`
931                by (MATCH_MP_TAC PROB_ADDITIVE
932                    >> RW_TAC std_ss []
933                    >> METIS_TAC [SUBSET_DEF, IN_INTER, IN_POW])
934        >> POP_ORW
935        >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_INSERT])
936   >> POP_ASSUM (MP_TAC o Q.SPEC `{x}`)
937   >> RW_TAC std_ss [IN_POW, SUBSET_DEF, IN_SING, finite_space_POW_integral_reduce,
938                     FINITE_CROSS, IMAGE_FINITE, measurable_sets_def, m_space_def,
939                     measure_def]
940   >> POP_ASSUM MP_TAC
941   >> `(s1 CROSS s2) =
942        x INSERT ((s1 CROSS s2) DELETE x)`
943        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INSERT, IN_DELETE] >> METIS_TAC [])
944   >> POP_ORW
945   >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_THM, FINITE_CROSS, FINITE_DELETE, DELETE_DELETE]
946   >> `FINITE (s1 CROSS s2 DELETE x)`
947        by RW_TAC std_ss [FINITE_DELETE, FINITE_CROSS]
948   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `((s1 :'d -> bool) CROSS
949                        (s2 :'e # 'f -> bool) DELETE (x :'d # 'e # 'f))`) REAL_SUM_IMAGE_IN_IF]
950   >> `(\x''.
951      (if x'' IN s1 CROSS s2 DELETE x then
952         (\x''.
953            x' x'' * indicator_fn {x} x'' *
954            prod_measure (s1,POW s1,distribution p X)
955              (s2,POW s2,distribution p (\x. (Y x,Z x))) {x''}) x''
956       else
957         0)) = (\x'. 0)`
958        by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [indicator_fn_def, IN_SING, IN_DELETE])
959   >> POP_ORW
960   >> ASM_SIMP_TAC real_ss [REAL_SUM_IMAGE_0, indicator_fn_def, IN_SING]
961   >> `{(x :'d # 'e # 'f)} = {FST x} CROSS {SND x}`
962                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_SING, IN_CROSS]
963                            >> (MP_TAC o Q.ISPEC `(x'' :'d # 'e # 'f)`) pair_CASES
964                            >> (MP_TAC o Q.ISPEC `(x :'d # 'e # 'f)`) pair_CASES
965                            >> RW_TAC std_ss [])
966   >> FULL_SIMP_TAC std_ss [] >> POP_ASSUM (K ALL_TAC)
967   >> `prod_measure (s1,POW s1,distribution p X) (s2,POW s2,distribution p (\x. (Y x,Z x)))
968                        ({FST x} CROSS {SND x}) =
969                        measure (s1,POW s1,distribution p X) {FST x} *
970                        measure (s2,POW s2,distribution p (\x. (Y x,Z x))) {SND x}`
971                        by (`measure_space (s1,POW s1,distribution p X) /\
972                             measure_space (s2,POW s2,distribution p (\x. (Y x,Z x)))`
973                                by METIS_TAC [distribution_prob_space, prob_space_def, space_def, subsets_def]
974                            >> `{FST x} IN measurable_sets (s1,POW s1,distribution p X) /\
975                                {SND x} IN measurable_sets (s2,POW s2,distribution p (\x. (Y x,Z x)))`
976                                by (RW_TAC std_ss [measurable_sets_def, IN_POW, SUBSET_DEF, IN_SING]
977                                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_CROSS])
978                            >> METIS_TAC [finite_POW_prod_measure_reduce, m_space_def, measurable_sets_def])
979   >> POP_ORW
980   >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR]
981   >> POP_ORW
982   >> FULL_SIMP_TAC std_ss [measure_def, joint_distribution_def, distribution_def]
983   >> `PREIMAGE (\x. (X x,Y x,Z x)) ({FST x} CROSS {SND x}) INTER
984        p_space p =
985        PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p`
986        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING, IN_CROSS])
987   >> FULL_SIMP_TAC std_ss [] >> POP_ASSUM (K ALL_TAC)
988   >> POP_ORW
989   >> Suff `0 < (prob p (PREIMAGE X {FST x} INTER p_space p) *
990            prob p (PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p))`
991   >- RW_TAC real_ss [REAL_EQ_RDIV_EQ]
992   >> MATCH_MP_TAC REAL_LT_MUL
993   >> SIMP_TAC std_ss [REAL_LT_LE]
994   >> `PREIMAGE X {FST x} INTER p_space p IN events p`
995                by (Q.UNABBREV_TAC `s1`
996                    >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def,
997                                                          IN_POW]
998                    >> Suff `{FST x} SUBSET IMAGE X (p_space p)` >- METIS_TAC []
999                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS])
1000   >> `PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p IN events p`
1001                                by (Q.UNABBREV_TAC `s2`
1002                                    >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def,
1003                                                          IN_POW]
1004                                    >> Suff `{SND x} SUBSET IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)` >- METIS_TAC []
1005                                    >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS])
1006   >> `(PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p) IN events p`
1007                                by (`PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p =
1008                                    (PREIMAGE X {FST x} INTER p_space p) INTER
1009                                    (PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p)`
1010                                        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER] >> DECIDE_TAC)
1011                                    >> POP_ORW
1012                                    >> FULL_SIMP_TAC std_ss [random_variable_def, prob_space_def, measure_space_def,
1013                                                             events_def]
1014                                    >> METIS_TAC [sigma_algebra_def, ALGEBRA_INTER, space_def, subsets_def])
1015    >> Suff `~(0 = prob p (PREIMAGE X {FST x} INTER p_space p)) /\
1016                     ~(0 = prob p (PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p))`
1017    >- METIS_TAC [random_variable_def, PROB_POSITIVE]
1018    >> `0 <= prob p
1019              (PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER
1020               p_space p)`
1021                by METIS_TAC [random_variable_def, PROB_POSITIVE]
1022    >> (CONJ_TAC >> SPOSE_NOT_THEN STRIP_ASSUME_TAC)
1023    >- (Suff `prob p (PREIMAGE (X :'a -> 'd) {FST x} INTER PREIMAGE ((\x. (Y x,Z x)) :'a -> 'e # 'f) {SND x} INTER p_space p) <=
1024              prob p (PREIMAGE (X :'a -> 'd) {FST x} INTER p_space p)`
1025                >- METIS_TAC [REAL_LE_ANTISYM]
1026                >> MATCH_MP_TAC PROB_INCREASING
1027                >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER])
1028    >> Suff `prob p (PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p) <=
1029                      prob p (PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p)`
1030    >- METIS_TAC [REAL_LE_ANTISYM]
1031    >> MATCH_MP_TAC PROB_INCREASING
1032    >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER]);
1033
1034val finite_conditional_mutual_information_reduce = store_thm
1035  ("finite_conditional_mutual_information_reduce",
1036   ``!b p X Y Z. (POW (p_space p) = events p) /\
1037               random_variable X p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) /\
1038               random_variable Y p (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) /\
1039               random_variable Z p (IMAGE Z (p_space p), POW (IMAGE Z (p_space p))) /\
1040               FINITE (p_space p) ==>
1041        (conditional_mutual_information b p (IMAGE X (p_space p), POW (IMAGE X (p_space p)))
1042                                            (IMAGE Y (p_space p), POW (IMAGE Y (p_space p)))
1043                                            (IMAGE Z (p_space p), POW (IMAGE Z (p_space p)))
1044                                             X Y Z =
1045        ~ SIGMA (\(x,z). joint_distribution p X Z {(x,z)} *
1046                         logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z}))
1047                (IMAGE X (p_space p) CROSS IMAGE Z (p_space p))
1048        -
1049        ~ SIGMA (\(x,(y,z)). joint_distribution p X (\x. (Y x, Z x)) {(x,(y,z))} *
1050                           logr b (joint_distribution p X (\x. (Y x, Z x)) {(x,(y,z))}/
1051                                   distribution p (\x. (Y x, Z x)) {(y,z)}))
1052                ((IMAGE X (p_space p)) CROSS (IMAGE (\x. (Y x, Z x)) (p_space p))))``,
1053   REPEAT STRIP_TAC
1054   >> `random_variable (\x. (Y x, Z x)) p (IMAGE (\x. (Y x, Z x)) (p_space p), POW (IMAGE (\x. (Y x, Z x)) (p_space p)))`
1055        by (RW_TAC std_ss [random_variable_def]
1056            >- FULL_SIMP_TAC std_ss [random_variable_def]
1057            >> FULL_SIMP_TAC std_ss [IN_MEASURABLE, random_variable_def, POW_SIGMA_ALGEBRA, IN_FUNSET, space_def,
1058                                     subsets_def, IN_POW]
1059            >> CONJ_TAC >- (RW_TAC std_ss [IN_IMAGE] >> METIS_TAC [])
1060            >> METIS_TAC [SUBSET_DEF, IN_INTER, IN_POW])
1061   >> RW_TAC std_ss [conditional_mutual_information_def, finite_mutual_information_reduce, space_def, subsets_def]
1062   >> Know `prod_space =
1063       ((IMAGE Y (p_space p)) CROSS (IMAGE Z (p_space p)), POW ((IMAGE Y (p_space p)) CROSS (IMAGE Z (p_space p))),
1064        prod_measure (IMAGE Y (p_space p), POW (IMAGE Y (p_space p)), distribution p Y)
1065                     (IMAGE Z (p_space p), POW (IMAGE Z (p_space p)), distribution p Z))`
1066   >- ( Q.UNABBREV_TAC `prod_space`
1067        >> Q.ABBREV_TAC `s1 = IMAGE Y (p_space p)` >> Q.ABBREV_TAC `s2 = IMAGE Z (p_space p)`
1068        >> RW_TAC std_ss [prod_measure_space_def, m_space_def, subsets_def, EXTENSION, subsets_def,
1069                          sigma_def, prod_sets_def, IN_POW, IN_BIGINTER, measurable_sets_def,
1070                          SUBSET_DEF, IN_CROSS, GSPECIFICATION]
1071        >> RW_TAC std_ss [GSYM EXTENSION]
1072        >> EQ_TAC
1073        >- (ASM_SIMP_TAC std_ss [] >> STRIP_TAC
1074            >> Q.PAT_X_ASSUM `!s. Q ==> x IN P` (MP_TAC o Q.SPEC `POW (s1 CROSS s2)`)
1075
1076            >> ASM_SIMP_TAC std_ss [IN_POW, SUBSET_DEF, IN_CROSS, POW_SIGMA_ALGEBRA]
1077            >> STRIP_TAC >> POP_ASSUM MATCH_MP_TAC
1078            >> NTAC 2 STRIP_TAC >> POP_ASSUM MP_TAC
1079            >> `(x' :('c -> bool) # ('d -> bool)) = (FST x', SND x')` by RW_TAC std_ss [PAIR]
1080            >> POP_ORW >> RW_TAC std_ss [IN_CROSS] )
1081        >> RW_TAC std_ss []
1082        >> `x = BIGUNION (IMAGE (\a. {a}) x)`
1083                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_BIGUNION_IMAGE, IN_SING])
1084        >> POP_ORW
1085        >> FULL_SIMP_TAC std_ss [SIGMA_ALGEBRA, subsets_def]
1086        >> POP_ASSUM MATCH_MP_TAC
1087        >> CONJ_TAC
1088        >- (MATCH_MP_TAC FINITE_COUNTABLE >> MATCH_MP_TAC IMAGE_FINITE
1089                >> (MP_TAC o Q.ISPEC `(s1 :'c -> bool) CROSS (s2 :'d -> bool)`) SUBSET_FINITE
1090                >> Q.UNABBREV_TAC `s1` >> Q.UNABBREV_TAC `s2`
1091                >> RW_TAC std_ss [FINITE_CROSS, IMAGE_FINITE]
1092                >> POP_ASSUM MATCH_MP_TAC
1093                >> METIS_TAC [SUBSET_DEF, IN_CROSS])
1094        >> RW_TAC std_ss [SUBSET_DEF, IN_IMAGE, IN_SING]
1095        >> Q.PAT_X_ASSUM `!x. Q ==> x IN P` MATCH_MP_TAC
1096        >> Q.EXISTS_TAC `({FST a}, {SND a})` >> RW_TAC std_ss [PAIR_EQ, IN_SING]
1097        >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_CROSS, IN_SING]
1098        >> METIS_TAC [PAIR_EQ, PAIR, FST, SND])
1099   >> DISCH_TAC
1100   >> RW_TAC std_ss [conditional_mutual_information_def, finite_mutual_information_reduce2,
1101                     PSPACE, EVENTS]
1102   >> `!a. a INTER p_space p IN events p`
1103        by METIS_TAC [IN_INTER, IN_POW, SUBSET_DEF]
1104   >> `!a. 0 <= prob p (a INTER p_space p)`
1105        by METIS_TAC [random_variable_def, PROB_POSITIVE]
1106   >> `!a b. (prob p (b INTER p_space p) = 0) ==>
1107             (prob p (a INTER b INTER p_space p) = 0)`
1108        by (RW_TAC std_ss [] >> ONCE_REWRITE_TAC [GSYM REAL_LE_ANTISYM]
1109            >> RW_TAC std_ss [] >> POP_ASSUM (fn thm => ONCE_REWRITE_TAC [GSYM thm])
1110            >> MATCH_MP_TAC PROB_INCREASING
1111            >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER])
1112   >> `FINITE (IMAGE X (p_space p) CROSS IMAGE Z (p_space p))`
1113                by RW_TAC std_ss [IMAGE_FINITE, FINITE_CROSS]
1114   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p)
1115                        CROSS IMAGE (Z :'a -> 'd) (p_space p))`) REAL_SUM_IMAGE_IN_IF]
1116   >> `(\x.
1117         (if x IN IMAGE X (p_space p) CROSS IMAGE Z (p_space p) then
1118            (\(x,y).
1119               joint_distribution p X Z {(x,y)} *
1120               logr b
1121                 (joint_distribution p X Z {(x,y)} / (distribution p X {x} * distribution p Z {y})))
1122              x
1123          else
1124            0)) =
1125        (\x. if x IN IMAGE X (p_space p) CROSS IMAGE Z (p_space p) then
1126                (\(x,z). joint_distribution p X Z {(x,z)} * logr b (joint_distribution p X Z {(x,z)} /
1127                                (distribution p Z {z}))
1128                       - joint_distribution p X Z {(x,z)} * logr b (distribution p X {x})) x
1129                else 0)`
1130        by (RW_TAC std_ss [FUN_EQ_THM, IN_IMAGE, IN_CROSS] >> RW_TAC std_ss []
1131            >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss []
1132            >> FULL_SIMP_TAC std_ss []
1133            >> RW_TAC std_ss [GSYM REAL_SUB_LDISTRIB]
1134            >> `joint_distribution p X Z {(X x',Z x'')} /
1135                (distribution p X {X x'} * distribution p Z {Z x''}) =
1136                (joint_distribution p X Z {(X x',Z x'')} /
1137                                (distribution p Z {Z x''})) /
1138                (distribution p X {X x'})`
1139                by (RW_TAC std_ss [real_div]
1140                    >> Cases_on `distribution p X {X x'} = 0`
1141                    >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def]
1142                        >> `PREIMAGE (\x. (X x,Z x)) {(X x',Z x'')} INTER p_space p =
1143                            PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p`
1144                                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING])
1145                        >> POP_ORW
1146                        >> `PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p =
1147                            PREIMAGE Z {Z x''} INTER PREIMAGE X {X x'} INTER p_space p`
1148                                by METIS_TAC [INTER_ASSOC, INTER_COMM]
1149                        >> POP_ORW
1150                        >> RW_TAC real_ss [])
1151                    >> Cases_on `distribution p Z {Z x''} = 0`
1152                    >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def]
1153                        >> `PREIMAGE (\x. (X x,Z x)) {(X x',Z x'')} INTER p_space p =
1154                            PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p`
1155                                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING])
1156                        >> ASM_SIMP_TAC real_ss [])
1157                    >> RW_TAC real_ss [REAL_INV_MUL]
1158                    >> REAL_ARITH_TAC)
1159            >> POP_ORW
1160            >> Cases_on `distribution p X {X x'} = 0`
1161            >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def]
1162                >> `PREIMAGE (\x. (X x,Z x)) {(X x',Z x'')} INTER p_space p =
1163                            PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p`
1164                                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING])
1165                >> POP_ORW
1166                >> `PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p =
1167                    PREIMAGE Z {Z x''} INTER PREIMAGE X {X x'} INTER p_space p`
1168                        by METIS_TAC [INTER_ASSOC, INTER_COMM]
1169                >> POP_ORW
1170                >> RW_TAC real_ss [])
1171            >> Cases_on `joint_distribution p X Z {(X x',Z x'')} / distribution p Z {Z x''} = 0`
1172            >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def]
1173                >> `PREIMAGE (\x. (X x,Z x)) {(X x',Z x'')} INTER p_space p =
1174                            PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p`
1175                                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING])
1176                >> FULL_SIMP_TAC std_ss []
1177                >> Cases_on `prob p (PREIMAGE Z {Z x''} INTER p_space p) = 0`
1178                >- RW_TAC real_ss []
1179                >> `0 < prob p (PREIMAGE Z {Z x''} INTER p_space p)`
1180                        by METIS_TAC [REAL_LT_LE]
1181                >> FULL_SIMP_TAC real_ss [REAL_EQ_LDIV_EQ])
1182            >> `0 < distribution p X {X x'}`
1183                by RW_TAC std_ss [distribution_def, REAL_LT_LE]
1184            >> `0 < joint_distribution p X Z {(X x',Z x'')} / distribution p Z {Z x''}`
1185                by RW_TAC std_ss [REAL_LT_LE, joint_distribution_def, distribution_def, REAL_LE_DIV]
1186            >> RW_TAC std_ss [logr_div])
1187   >> POP_ORW
1188   >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF]
1189   >> RW_TAC std_ss [REAL_SUB_RNEG]
1190   >> `!x y. ~ x + y = y - x`
1191        by REAL_ARITH_TAC
1192   >> POP_ORW
1193   >> `SIGMA
1194      (\x.
1195         (\(x,z).
1196            joint_distribution p X Z {(x,z)} *
1197            logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z}) -
1198            joint_distribution p X Z {(x,z)} * logr b (distribution p X {x})) x)
1199      (IMAGE X (p_space p) CROSS IMAGE Z (p_space p)) =
1200        SIGMA
1201      (\x.
1202         (\(x,z).
1203            joint_distribution p X Z {(x,z)} *
1204            logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z})) x)
1205      (IMAGE X (p_space p) CROSS IMAGE Z (p_space p)) -
1206        SIGMA
1207      (\x.
1208         (\(x,z). joint_distribution p X Z {(x,z)} * logr b (distribution p X {x})) x)
1209      (IMAGE X (p_space p) CROSS IMAGE Z (p_space p))`
1210        by (RW_TAC std_ss [real_sub]
1211            >> `(\x. (\(x,z).
1212                joint_distribution p X Z {(x,z)} *
1213                logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z}) +
1214                ~(joint_distribution p X Z {(x,z)} * logr b (distribution p X {x}))) x) =
1215                (\x. (\(x,z).
1216                joint_distribution p X Z {(x,z)} *
1217                logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z})) x +
1218                (\(x,z). ~(joint_distribution p X Z {(x,z)} * logr b (distribution p X {x}))) x)`
1219                by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC std_ss []
1220                    >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss []
1221                    >> FULL_SIMP_TAC std_ss [])
1222            >> POP_ORW
1223            >> RW_TAC std_ss [REAL_SUM_IMAGE_ADD, FINITE_CROSS, IMAGE_FINITE, REAL_EQ_LADD]
1224            >> `(\x. (\(x,z). ~(joint_distribution p X Z {(x,z)} * logr b (distribution p X {x}))) x) =
1225                (\x. ~1 * (\(x,z). (joint_distribution p X Z {(x,z)} * logr b (distribution p X {x}))) x)`
1226                by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC std_ss []
1227                    >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss []
1228                    >> FULL_SIMP_TAC real_ss [])
1229            >> POP_ORW
1230            >> RW_TAC std_ss [REAL_SUM_IMAGE_CMUL, IMAGE_FINITE, FINITE_CROSS]
1231            >> RW_TAC real_ss [])
1232   >> POP_ORW
1233   >> `!(x:real) y z. x - (y - z) = (x + z) - y`
1234        by REAL_ARITH_TAC
1235   >> POP_ORW
1236   >> Suff `SIGMA
1237  (\(x,y,z).
1238     joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1239     logr b
1240       (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
1241        (distribution p X {x} *
1242         distribution p (\x. (Y x,Z x)) {(y,z)})))
1243  (IMAGE X (p_space p) CROSS
1244   (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))) +
1245SIGMA
1246  (\x.
1247     (\(x,z).
1248        joint_distribution p X Z {(x,z)} *
1249        logr b (distribution p X {x})) x)
1250  (IMAGE X (p_space p) CROSS IMAGE Z (p_space p)) =
1251        SIGMA
1252  (\(x,y,z).
1253     joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1254     logr b
1255       (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
1256        distribution p (\x. (Y x,Z x)) {(y,z)}))
1257  (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p))`
1258   >- RW_TAC std_ss []
1259   >> `FINITE (IMAGE X (p_space p) CROSS IMAGE Z (p_space p))`
1260        by RW_TAC std_ss [FINITE_CROSS, IMAGE_FINITE]
1261   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p)
1262                        CROSS IMAGE (Z :'a -> 'd) (p_space p))`) REAL_SUM_IMAGE_IN_IF]
1263   >> `(\x.
1264         (if x IN IMAGE X (p_space p) CROSS IMAGE Z (p_space p) then
1265            (\x.
1266               (\(x,z). joint_distribution p X Z {(x,z)} * logr b (distribution p X {x})) x)
1267              x
1268          else
1269            0)) =
1270        (\x. if x IN IMAGE X (p_space p) CROSS IMAGE Z (p_space p) then
1271                (\x. (\(x,z). SIGMA (\y. joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1272                                         logr b (distribution p X {x})) (IMAGE Y (p_space p))) x) x else 0)`
1273        by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC std_ss []
1274            >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss []
1275            >> RW_TAC std_ss []
1276            >> `(\y. joint_distribution p X (\x. (Y x,Z x)) {(q,y,r)} * logr b (distribution p X {q})) =
1277                (\y. logr b (distribution p X {q}) * (\y. joint_distribution p X (\x. (Y x,Z x)) {(q,y,r)}) y)`
1278                by RW_TAC std_ss [REAL_MUL_COMM]
1279            >> POP_ORW
1280            >> ASM_SIMP_TAC std_ss [IMAGE_FINITE, REAL_SUM_IMAGE_CMUL]
1281            >> Suff `joint_distribution p X Z {(q,r)} =
1282                     SIGMA (\y. joint_distribution p X (\x. (Y x,Z x)) {(q,y,r)})
1283                        (IMAGE Y (p_space p))`
1284            >- RW_TAC std_ss [REAL_MUL_COMM]
1285            >> RW_TAC std_ss [joint_distribution_def]
1286            >> `PREIMAGE (\x. (X x,Z x)) {(q,r)} INTER p_space p =
1287                            PREIMAGE X {q} INTER PREIMAGE Z {r} INTER p_space p`
1288                                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING])
1289            >> POP_ORW
1290            >> `!y. PREIMAGE (\x. (X x,Y x,Z x)) {(q,y,r)} INTER p_space p =
1291                            PREIMAGE X {q} INTER PREIMAGE Y {y} INTER PREIMAGE Z {r} INTER p_space p`
1292                                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> METIS_TAC [])
1293            >> POP_ORW
1294            >> `FINITE (IMAGE Y (p_space p))` by RW_TAC std_ss [IMAGE_FINITE]
1295            >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `IMAGE (Y :'a -> 'c) (p_space p)`) REAL_SUM_IMAGE_IN_IF]
1296            >> (MP_TAC o Q.ISPECL [`(p :('a -> bool) # (('a -> bool) -> bool) # (('a -> bool) -> real))`,
1297                                   `\(y:'c). PREIMAGE (Y:'a->'c) {y}`,
1298                                    `(PREIMAGE (X :'a -> 'b) {(q :'b)} INTER
1299                                        PREIMAGE (Z :'a -> 'd) {(r :'d)} INTER p_space p)`,
1300                                   `IMAGE (Y:'a->'c) (p_space p)`]) PROB_REAL_SUM_IMAGE_FN
1301            >> ASM_SIMP_TAC std_ss [IMAGE_FINITE]
1302            >> `prob_space p` by FULL_SIMP_TAC std_ss [random_variable_def]
1303            >> `(!x. x IN IMAGE Y (p_space p) ==>
1304                        PREIMAGE X {q} INTER PREIMAGE Z {r} INTER p_space p INTER
1305                        PREIMAGE Y {x} IN events p)`
1306                by METIS_TAC [INTER_COMM, INTER_ASSOC]
1307            >> `(!x y. x IN IMAGE Y (p_space p) /\ y IN IMAGE Y (p_space p) /\ ~(x = y) ==>
1308                DISJOINT (PREIMAGE Y {x}) (PREIMAGE Y {y}))`
1309                by (RW_TAC std_ss [IN_IMAGE, IN_DISJOINT, IN_PREIMAGE, IN_SING] >> METIS_TAC [])
1310            >> `(BIGUNION (IMAGE (\y. PREIMAGE Y {y}) (IMAGE Y (p_space p))) INTER p_space p = p_space p)`
1311                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNIV, IN_BIGUNION, IN_IMAGE, IN_PREIMAGE, IN_SING, IN_INTER]
1312                    >> EQ_TAC >- RW_TAC std_ss []
1313                    >> RW_TAC std_ss [] >> Q.EXISTS_TAC `PREIMAGE Y {Y x}`
1314                    >> RW_TAC std_ss [IN_PREIMAGE, IN_SING] >> METIS_TAC [])
1315            >> ASM_SIMP_TAC std_ss []
1316            >> STRIP_TAC >> POP_ASSUM (K ALL_TAC)
1317            >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `IMAGE (Y :'a -> 'c) (p_space p)`) REAL_SUM_IMAGE_IN_IF]
1318            >> Suff `(\x.
1319     (if x IN IMAGE Y (p_space p) then
1320        prob p
1321          (PREIMAGE X {q} INTER PREIMAGE Z {r} INTER p_space p INTER
1322           PREIMAGE Y {x})
1323      else
1324        0)) =
1325                     (\x.
1326     (if x IN IMAGE Y (p_space p) then
1327        prob p
1328          (PREIMAGE X {q} INTER PREIMAGE Y {x} INTER
1329           PREIMAGE Z {r} INTER p_space p)
1330      else
1331        0))`
1332            >- RW_TAC std_ss []
1333            >> RW_TAC std_ss [FUN_EQ_THM]
1334            >> RW_TAC std_ss [IN_IMAGE]
1335            >> Suff `PREIMAGE X {q} INTER PREIMAGE Z {r} INTER p_space p INTER PREIMAGE Y {Y x'} =
1336                     PREIMAGE X {q} INTER PREIMAGE Y {Y x'} INTER PREIMAGE Z {r} INTER p_space p`
1337            >- RW_TAC std_ss []
1338            >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> DECIDE_TAC)
1339   >> POP_ORW
1340   >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF, FINITE_CROSS, IMAGE_FINITE]
1341   >> `(\x.
1342         (\(x,z).
1343            SIGMA
1344              (\y.
1345                 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1346                 logr b (distribution p X {x})) (IMAGE Y (p_space p))) x) =
1347        (\x. SIGMA ((\(x,z). (\y.
1348                 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1349                 logr b (distribution p X {x})) ) x) (IMAGE Y (p_space p)))`
1350        by (RW_TAC std_ss [FUN_EQ_THM] >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss []
1351            >> RW_TAC std_ss [])
1352   >> POP_ORW
1353   >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_REAL_SUM_IMAGE, FINITE_CROSS, IMAGE_FINITE]
1354   >> `(IMAGE X (p_space p) CROSS IMAGE Z (p_space p) CROSS
1355       IMAGE Y (p_space p)) =
1356        IMAGE (\a. ((FST a, SND(SND a)), FST (SND a)))
1357                (IMAGE X (p_space p) CROSS (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)))`
1358        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_IMAGE, IN_CROSS]
1359            >> EQ_TAC
1360            >- (RW_TAC std_ss [] >> Q.EXISTS_TAC `(FST (FST x), (SND x, SND(FST x)))`
1361                >> RW_TAC std_ss [FST,SND] >> METIS_TAC [PAIR])
1362            >> RW_TAC std_ss [FST, SND]
1363            >> RW_TAC std_ss [FST, SND] >> METIS_TAC [])
1364   >> POP_ORW
1365   >> `INJ (\a. ((FST a,SND (SND a)),FST (SND a)))
1366                ((IMAGE X (p_space p) CROSS
1367          (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))))
1368        (IMAGE (\a. ((FST a,SND (SND a)),FST (SND a))) ((IMAGE X (p_space p) CROSS
1369          (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)))))`
1370        by (RW_TAC std_ss [INJ_DEF, IN_IMAGE, IN_CROSS] >- METIS_TAC []
1371            >> METIS_TAC [PAIR, PAIR_EQ])
1372   >> RW_TAC std_ss [REAL_SUM_IMAGE_IMAGE, IMAGE_FINITE, FINITE_CROSS, o_DEF]
1373   >> RW_TAC std_ss [GSYM REAL_SUM_IMAGE_ADD, FINITE_CROSS, IMAGE_FINITE]
1374   >> `(IMAGE X (p_space p) CROSS
1375       (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))) =
1376        (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)) UNION
1377        (((IMAGE X (p_space p) CROSS
1378       (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)))) DIFF
1379        (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)))`
1380        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNION, IN_DIFF, IN_CROSS, IN_IMAGE] >> METIS_TAC [PAIR_EQ, PAIR])
1381   >> POP_ORW
1382   >> `DISJOINT (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p))
1383                ((IMAGE X (p_space p) CROSS
1384        (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF
1385        IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)))`
1386        by (RW_TAC std_ss [IN_DISJOINT, IN_DIFF] >> DECIDE_TAC)
1387   >> `SIGMA
1388      (\a.
1389     (\(x,y,z).
1390        joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1391        logr b
1392          (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
1393           (distribution p X {x} *
1394            distribution p (\x. (Y x,Z x)) {(y,z)}))) a +
1395     joint_distribution p X (\x. (Y x,Z x)) {a} *
1396     logr b (distribution p X {FST a}))
1397      (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p) UNION
1398       (IMAGE X (p_space p) CROSS
1399        (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF
1400        IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p))) =
1401        SIGMA (\a.
1402     (\(x,y,z).
1403        joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1404        logr b
1405          (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
1406           (distribution p X {x} *
1407            distribution p (\x. (Y x,Z x)) {(y,z)}))) a +
1408     joint_distribution p X (\x. (Y x,Z x)) {a} *
1409     logr b (distribution p X {FST a})) (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)) +
1410        SIGMA (\a.
1411     (\(x,y,z).
1412        joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1413        logr b
1414          (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
1415           (distribution p X {x} *
1416            distribution p (\x. (Y x,Z x)) {(y,z)}))) a +
1417     joint_distribution p X (\x. (Y x,Z x)) {a} *
1418     logr b (distribution p X {FST a})) ((IMAGE X (p_space p) CROSS
1419        (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF
1420        IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)))`
1421        by (MATCH_MP_TAC REAL_SUM_IMAGE_DISJOINT_UNION
1422            >> RW_TAC std_ss [FINITE_DIFF, IMAGE_FINITE, FINITE_CROSS])
1423   >> POP_ORW
1424   >> `FINITE (IMAGE X (p_space p) CROSS
1425        (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF
1426        IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p))`
1427        by RW_TAC std_ss [FINITE_DIFF, FINITE_CROSS, IMAGE_FINITE]
1428   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b)
1429                (p_space
1430                   (p :
1431                     ('a -> bool) #
1432                     (('a -> bool) -> bool) # (('a -> bool) -> real))) CROSS
1433              (IMAGE (Y :'a -> 'c) (p_space p) CROSS
1434               IMAGE (Z :'a -> 'd) (p_space p)) DIFF
1435              IMAGE X (p_space p) CROSS
1436              IMAGE (\(x :'a). (Y x,Z x)) (p_space p))`) REAL_SUM_IMAGE_IN_IF]
1437   >> `(\x.
1438     (if
1439        x IN
1440        IMAGE X (p_space p) CROSS
1441        (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF
1442        IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)
1443      then
1444        (\a.
1445           (\(x,y,z).
1446              joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1447              logr b
1448                (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
1449                 (distribution p X {x} *
1450                  distribution p (\x. (Y x,Z x)) {(y,z)}))) a +
1451           joint_distribution p X (\x. (Y x,Z x)) {a} *
1452           logr b (distribution p X {FST a})) x
1453      else
1454        0)) = (\x. 0)`
1455        by (RW_TAC std_ss [FUN_EQ_THM, IN_DIFF, IN_CROSS, IN_IMAGE]
1456            >> (MP_TAC o Q.ISPEC `(x :'b # 'c # 'd)`) pair_CASES
1457            >> STRIP_TAC
1458            >> (MP_TAC o Q.ISPEC `(r :'c # 'd)`) pair_CASES
1459            >> STRIP_TAC
1460            >> RW_TAC std_ss [FST, SND, joint_distribution_def]
1461            >- (`PREIMAGE (\x. (X x,Y x,Z x)) {(X x',Y x'',Z x''')} INTER p_space p = {}`
1462                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [NOT_IN_EMPTY, IN_INTER, IN_PREIMAGE, IN_SING]
1463                    >> METIS_TAC [])
1464                >> FULL_SIMP_TAC real_ss [random_variable_def, PROB_EMPTY])
1465            >> `PREIMAGE (\x. (X x,Y x,Z x)) {(X x',Y x'',Z x''')} INTER p_space p = {}`
1466                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [NOT_IN_EMPTY, IN_INTER, IN_PREIMAGE, IN_SING]
1467                    >> METIS_TAC [])
1468            >> FULL_SIMP_TAC real_ss [random_variable_def, PROB_EMPTY])
1469   >> POP_ORW
1470   >> RW_TAC real_ss [REAL_SUM_IMAGE_0, FINITE_CROSS, IMAGE_FINITE, FINITE_DIFF]
1471   >> Suff `(\a.
1472     (\(x,y,z).
1473        joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1474        logr b
1475          (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
1476           (distribution p X {x} *
1477            distribution p (\x. (Y x,Z x)) {(y,z)}))) a +
1478     joint_distribution p X (\x. (Y x,Z x)) {a} *
1479     logr b (distribution p X {FST a})) =
1480        (\(x,y,z).
1481     joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} *
1482     logr b
1483       (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} /
1484        distribution p (\x. (Y x,Z x)) {(y,z)}))`
1485   >- RW_TAC std_ss []
1486   >> RW_TAC std_ss [FUN_EQ_THM]
1487   >> (MP_TAC o Q.ISPEC `(a :'b # 'c # 'd)`) pair_CASES
1488   >> STRIP_TAC
1489   >> (MP_TAC o Q.ISPEC `(r :'c # 'd)`) pair_CASES
1490   >> STRIP_TAC
1491   >> RW_TAC std_ss [GSYM REAL_ADD_LDISTRIB]
1492   >> Cases_on `distribution p X {q} = 0`
1493   >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def]
1494       >> `PREIMAGE (\x. (X x,Y x,Z x)) {(q,q',r')} INTER p_space p =
1495           PREIMAGE X {q} INTER PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER
1496                p_space p`
1497                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> METIS_TAC [])
1498       >> FULL_SIMP_TAC std_ss []
1499       >> `PREIMAGE X {q} INTER PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER
1500                p_space p = PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER PREIMAGE X {q} INTER p_space p`
1501                by METIS_TAC [INTER_ASSOC, INTER_COMM]
1502       >> RW_TAC real_ss [])
1503   >> Cases_on `(joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} /
1504        (distribution p X {q} * distribution p (\x. (Y x,Z x)) {(q',r')})) = 0`
1505   >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def]
1506       >> `PREIMAGE (\x. (X x,Y x,Z x)) {(q,q',r')} INTER p_space p =
1507           PREIMAGE X {q} INTER PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER
1508                p_space p`
1509                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> METIS_TAC [])
1510       >> FULL_SIMP_TAC std_ss []
1511       >> Cases_on `prob p (PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER p_space p) = 0`
1512       >- RW_TAC real_ss []
1513       >> `0 < (prob p (PREIMAGE X {q} INTER p_space p) *
1514            prob p (PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER p_space p))`
1515                by RW_TAC std_ss [REAL_ENTIRE, REAL_LT_LE, REAL_LE_MUL]
1516       >> FULL_SIMP_TAC real_ss [REAL_EQ_LDIV_EQ])
1517   >> `0 < distribution p X {q}`
1518        by RW_TAC std_ss [distribution_def, REAL_LT_LE]
1519   >> `0 < joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} /
1520             (distribution p X {q} * distribution p (\x. (Y x,Z x)) {(q',r')})`
1521        by RW_TAC std_ss [REAL_LT_LE, REAL_LE_DIV, REAL_LE_MUL, distribution_def, joint_distribution_def]
1522   >> RW_TAC std_ss [GSYM logr_mul]
1523   >> Suff `joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} /
1524       (distribution p X {q} * distribution p (\x. (Y x,Z x)) {(q',r')}) * distribution p X {q} =
1525        joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')}/(distribution p (\x. (Y x,Z x)) {(q',r')})`
1526   >- RW_TAC std_ss []
1527   >> RW_TAC std_ss [real_div]
1528   >> Cases_on `distribution p X {q} = 0`
1529   >- FULL_SIMP_TAC real_ss [distribution_def, joint_distribution_def]
1530   >> Cases_on `distribution p (\x. (Y x,Z x)) {(q',r')} = 0`
1531   >- (FULL_SIMP_TAC real_ss [distribution_def, joint_distribution_def]
1532       >> `PREIMAGE (\x. (X x,Y x,Z x)) {(q,q',r')} INTER p_space p =
1533           PREIMAGE X {q} INTER PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER
1534                p_space p`
1535                by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> METIS_TAC [])
1536       >> FULL_SIMP_TAC real_ss [])
1537   >> RW_TAC std_ss [REAL_INV_MUL]
1538   >> `joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} *
1539    (inv (distribution p X {q}) * inv (distribution p (\x. (Y x,Z x)) {(q',r')})) *
1540    distribution p X {q} =
1541        (joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} *
1542    inv (distribution p (\x. (Y x,Z x)) {(q',r')})) *
1543        (inv (distribution p X {q}) * distribution p X {q})` by REAL_ARITH_TAC
1544   >> POP_ORW
1545   >> RW_TAC real_ss [REAL_MUL_LINV]);
1546
1547
1548(* ************************************************************************* *)
1549
1550(* -------------Entropy of a RV with a certain event is zero---------------- *)
1551
1552
1553val finite_entropy_certainty_eq_0 = store_thm
1554  ("finite_entropy_certainty_eq_0",
1555   ``!b p X.
1556         (POW (p_space p) = events p) /\
1557         random_variable X p
1558           (IMAGE X (p_space p),POW (IMAGE X (p_space p))) /\
1559         FINITE (p_space p) /\
1560         (?x. x IN IMAGE X (p_space p) /\ (distribution p X {x} = 1)) ==>
1561         (entropy b p (IMAGE X (p_space p),POW (IMAGE X (p_space p))) X = 0)``,
1562   RW_TAC std_ss [finite_entropy_reduce]
1563   >> `FINITE (IMAGE X (p_space p))` by RW_TAC std_ss [IMAGE_FINITE]
1564   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `IMAGE (X :'a -> 'b) (p_space p)`) REAL_SUM_IMAGE_IN_IF]
1565   >> Suff `(\x.
1566          (if x IN IMAGE X (p_space p) then
1567             (\x. distribution p X {x} * logr b (distribution p X {x})) x
1568           else
1569             0)) = (\x. 0)`
1570   >- RW_TAC real_ss [REAL_SUM_IMAGE_0, IMAGE_FINITE]
1571
1572   >> RW_TAC std_ss [FUN_EQ_THM]
1573   >> RW_TAC std_ss []
1574   >> Cases_on `x' = x`
1575   >- RW_TAC real_ss [logr_1]
1576   >> (MP_TAC o Q.SPECL [`X`,`p`,`x`]) distribution_x_eq_1_imp_distribution_y_eq_0
1577   >> ASM_SIMP_TAC real_ss [GSPECIFICATION]);
1578
1579
1580(* --------------- upper bound on entropy for a rv ------------------------- *)
1581
1582
1583val finite_entropy_le_card = store_thm
1584  ("finite_entropy_le_card",
1585   ``!b p X. 1 <= b /\
1586         (POW (p_space p) = events p) /\
1587         random_variable X p
1588           (IMAGE X (p_space p),POW (IMAGE X (p_space p))) /\
1589         FINITE (p_space p) ==>
1590        entropy b p (IMAGE X (p_space p),POW (IMAGE X (p_space p))) X <=
1591        logr b (& (CARD ((IMAGE X (p_space p)) INTER {x| ~(distribution p X {x} = 0)})))``,
1592   RW_TAC std_ss [finite_entropy_reduce]
1593   >> `!x. ~x = ~1*x` by REAL_ARITH_TAC >> POP_ORW
1594   >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_CMUL, IMAGE_FINITE]
1595   >> RW_TAC real_ss []
1596   >> ONCE_REWRITE_TAC [GSYM REAL_MUL_RNEG]
1597   >> `FINITE (IMAGE X (p_space p))`
1598        by RW_TAC std_ss [INTER_FINITE, IMAGE_FINITE]
1599   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) REAL_SUM_IMAGE_IN_IF]
1600   >> `(\x.
1601         (if x IN IMAGE X (p_space p) then
1602            (\x. distribution p X {x} * ~logr b (distribution p X {x})) x
1603          else
1604            0)) =
1605        (\x.
1606         (if x IN IMAGE X (p_space p) then
1607            (\x. distribution p X {x} * logr b (inv (distribution p X {x}))) x
1608          else
1609            0))`
1610        by (RW_TAC std_ss [FUN_EQ_THM] >> Cases_on `distribution p X {x} = 0` >- RW_TAC real_ss []
1611            >> RW_TAC std_ss [IN_INTER, GSPECIFICATION, REAL_EQ_LMUL]
1612            >> MATCH_MP_TAC (GSYM logr_inv)
1613            >> RW_TAC std_ss [REAL_LT_LE, distribution_def]
1614            >> MATCH_MP_TAC PROB_POSITIVE
1615            >> METIS_TAC [random_variable_def, IN_POW, SUBSET_DEF, IN_INTER])
1616   >> POP_ORW
1617   >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF]
1618   >> Q.ABBREV_TAC `P = \x:'b. distribution p (X :'a -> 'b) {x}`
1619   >> Q.ABBREV_TAC `g = \x:'b. inv (P x)`
1620   >> `(\x. distribution p X {x} * logr b (inv (distribution p X {x}))) =
1621        (\x. P x * logr b (g x))`
1622        by (RW_TAC std_ss [FUN_EQ_THM] >> Q.UNABBREV_TAC `g`
1623            >> Q.UNABBREV_TAC `P` >> RW_TAC std_ss [])
1624   >> POP_ORW
1625   >> MATCH_MP_TAC REAL_LE_TRANS
1626   >> Q.EXISTS_TAC `logr b (SIGMA (\x. P x * g x) (IMAGE X (p_space p)))`
1627   >> CONJ_TAC
1628   >- ((MATCH_MP_TAC o UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`)
1629                jensen_pos_concave_SIGMA
1630       >> Q.UNABBREV_TAC `g` >> Q.UNABBREV_TAC `P`
1631       >> `prob_space (IMAGE X (p_space p), POW (IMAGE X (p_space p)), distribution p X)`
1632                        by METIS_TAC [distribution_prob_space, space_def, subsets_def]
1633       >> CONJ_TAC
1634       >- ((MP_TAC o GSYM o Q.ISPECL [`(IMAGE (X :'a -> 'b)
1635               (p_space
1636                  (p :
1637                    ('a -> bool) #
1638                    (('a -> bool) -> bool) # (('a -> bool) -> real))),
1639                POW (IMAGE X (p_space p)),distribution p X)`,`IMAGE (X :'a -> 'b) (p_space p)`]) PROB_REAL_SUM_IMAGE
1640           >> RW_TAC std_ss [EVENTS, IN_POW, SUBSET_REFL, PROB, SUBSET_DEF, IN_SING]
1641           >> METIS_TAC [PROB_UNIV, PROB, PSPACE])
1642       >> CONJ_TAC
1643       >- METIS_TAC [PROB_POSITIVE, EVENTS, IN_POW, SUBSET_DEF, IN_SING, PROB_LE_1, PROB]
1644       >> RW_TAC std_ss [REAL_INV_POS, pos_concave_logr])
1645   >> Q.UNABBREV_TAC `g` >> Q.UNABBREV_TAC `P`
1646   >> Q.ABBREV_TAC `foo = logr b (& (CARD (IMAGE X (p_space p) INTER {x | ~(distribution p X {x} = 0)})))`
1647   >> `(IMAGE X (p_space p)) = ((IMAGE X (p_space p)) INTER {x| ~(distribution p X {x} = 0)}) UNION
1648                               ((IMAGE X (p_space p)) DIFF {x| ~(distribution p X {x} = 0)})`
1649        by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNION, IN_INTER, IN_DIFF] >> DECIDE_TAC)
1650   >> POP_ORW
1651   >> Q.UNABBREV_TAC `foo`
1652   >> `DISJOINT ((IMAGE X (p_space p)) INTER {x| ~(distribution p X {x} = 0)})
1653                ((IMAGE X (p_space p)) DIFF {x| ~(distribution p X {x} = 0)})`
1654        by (RW_TAC std_ss [IN_DISJOINT, IN_INTER, IN_DIFF] >> DECIDE_TAC)
1655   >> RW_TAC std_ss [REAL_SUM_IMAGE_DISJOINT_UNION, IMAGE_FINITE, INTER_FINITE, FINITE_DIFF]
1656   >> `FINITE (IMAGE X (p_space p) DIFF {x | ~(distribution p X {x} = 0)})`
1657        by RW_TAC std_ss [FINITE_DIFF, IMAGE_FINITE]
1658   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p) DIFF
1659                                {x | ~(distribution p (X :'a -> 'b) {x} = (0 :real))})`) REAL_SUM_IMAGE_IN_IF]
1660   >> RW_TAC real_ss [IN_DIFF, GSPECIFICATION, REAL_SUM_IMAGE_0]
1661   >> `FINITE (IMAGE X (p_space p) INTER {x | ~(distribution p X {x} = 0)})`
1662        by RW_TAC std_ss [INTER_FINITE, IMAGE_FINITE]
1663   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p) INTER
1664                                {x | ~(distribution p (X :'a -> 'b) {x} = (0 :real))})`) REAL_SUM_IMAGE_IN_IF]
1665   >> `(\x.
1666            (if x IN IMAGE X (p_space p) INTER {x | ~(distribution p X {x} = 0)} then
1667               (\x. distribution p X {x} * inv (distribution p X {x})) x
1668             else
1669               0)) =
1670        (\x.
1671            (if x IN IMAGE X (p_space p) INTER {x | ~(distribution p X {x} = 0)} then
1672               1
1673             else
1674               0))`
1675        by (RW_TAC real_ss [FUN_EQ_THM]
1676            >> RW_TAC std_ss [IN_INTER, GSPECIFICATION]
1677            >> MATCH_MP_TAC REAL_MUL_RINV
1678            >> ASM_REWRITE_TAC [])
1679   >> POP_ORW
1680   >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_EQ_CARD, REAL_LE_REFL]);
1681
1682
1683(* --------------- entropy is maximal for a uniform rv --------------------- *)
1684
1685
1686val finite_entropy_uniform_max = store_thm
1687  ("finite_entropy_uniform_max",
1688   ``!b p X.
1689        (POW (p_space p) = events p) /\
1690         random_variable X p
1691           (IMAGE X (p_space p),POW (IMAGE X (p_space p))) /\
1692         FINITE (p_space p) /\
1693        (!x y. x IN IMAGE X (p_space p) /\ y IN IMAGE X (p_space p) ==>
1694                (distribution p X {x} = distribution p X {y})) ==>
1695        (entropy b p (IMAGE X (p_space p),POW (IMAGE X (p_space p))) X = logr b (& (CARD ((IMAGE X (p_space p))))))``,
1696   RW_TAC std_ss [finite_entropy_reduce]
1697   >> `!x. ~x = ~1*x` by REAL_ARITH_TAC >> POP_ORW
1698   >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_CMUL, IMAGE_FINITE]
1699   >> RW_TAC real_ss []
1700   >> ONCE_REWRITE_TAC [GSYM REAL_MUL_RNEG]
1701   >> `FINITE (IMAGE X (p_space p))` by RW_TAC std_ss [IMAGE_FINITE]
1702
1703   >> (MP_TAC o Q.SPEC `(\x. distribution p X {x})` o UNDISCH o
1704        Q.ISPEC `IMAGE (X :'a -> 'b) (p_space p)`)
1705        REAL_SUM_IMAGE_CONST_EQ_1_EQ_INV_CARD
1706   >> `prob_space (IMAGE X (p_space p), POW (IMAGE X (p_space p)), distribution p X)`
1707                        by METIS_TAC [distribution_prob_space, space_def, subsets_def]
1708   >> `(SIGMA (\x. distribution p X {x}) (IMAGE X (p_space p)) = 1)`
1709        by ((MP_TAC o GSYM o Q.ISPECL [`(IMAGE (X :'a -> 'b)
1710               (p_space
1711                  (p :
1712                    ('a -> bool) #
1713                    (('a -> bool) -> bool) # (('a -> bool) -> real))),
1714                POW (IMAGE X (p_space p)),distribution p X)`,`IMAGE (X :'a -> 'b) (p_space p)`]) PROB_REAL_SUM_IMAGE
1715           >> RW_TAC std_ss [EVENTS, IN_POW, SUBSET_REFL, PROB, SUBSET_DEF, IN_SING]
1716           >> METIS_TAC [PROB_UNIV, PROB, PSPACE])
1717   >> ASM_SIMP_TAC std_ss []
1718   >> STRIP_TAC
1719   >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) REAL_SUM_IMAGE_IN_IF]
1720   >> Q.ABBREV_TAC `foo = (& (CARD (IMAGE (X :'a -> 'b) (p_space p))))`
1721   >> `(\x.
1722         (if x IN IMAGE X (p_space p) then
1723            (\x. distribution p X {x} * ~logr b (distribution p X {x})) x
1724          else
1725            0)) =
1726        (\x.
1727         (if x IN IMAGE X (p_space p) then
1728            (\x. inv foo * ~logr b (inv foo)) x
1729          else
1730            0))`
1731        by METIS_TAC []
1732   >> POP_ORW
1733   >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF]
1734   >> (MP_TAC o Q.SPECL [`(\x. inv foo * ~ logr b (inv foo))`,`inv foo * ~ logr b (inv foo)`] o UNDISCH o
1735        Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) REAL_SUM_IMAGE_FINITE_CONST
1736   >> RW_TAC std_ss []
1737   >> POP_ASSUM (K ALL_TAC)
1738   >> `0 < foo`
1739        by (Q.UNABBREV_TAC `foo` >> MATCH_MP_TAC REAL_NZ_IMP_LT
1740            >> RW_TAC std_ss [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) CARD_EQ_0]
1741            >> SPOSE_NOT_THEN STRIP_ASSUME_TAC >> FULL_SIMP_TAC real_ss [REAL_SUM_IMAGE_THM])
1742   >> `~logr b (inv foo) = logr b (inv (inv foo))`
1743        by (MATCH_MP_TAC neg_logr >> RW_TAC std_ss [REAL_LT_INV_EQ])
1744   >> POP_ORW
1745   >> RW_TAC real_ss [REAL_INVINV, REAL_LT_IMP_NE, REAL_MUL_ASSOC, REAL_MUL_RINV]);
1746
1747val _ = export_theory ();
1748