1(* ========================================================================= *) 2(* Create "informationTheory" setting up the theory of information *) 3(* ========================================================================= *) 4 5(* ------------------------------------------------------------------------- *) 6(* Load and open relevant theories *) 7(* (Comment out "load" and "loadPath"s for compilation) *) 8(* ------------------------------------------------------------------------- *) 9(* 10 11app load ["bossLib", "metisLib", "arithmeticTheory", "pred_setTheory", "listTheory", 12 "state_transformerTheory", "probabilityTheory", "formalizeUseful", 13 "combinTheory", "pairTheory", "realTheory", "realLib", "extra_boolTheory", "jrhUtils", 14 "extra_pred_setTheory", "realSimps", "extra_realTheory", 15 "measureTheory", "numTheory", "simpLib", 16 "seqTheory", "subtypeTheory", 17 "transcTheory", "limTheory", "stringTheory", "rich_listTheory", "stringSimps", 18 "listSimps", "lebesgueTheory", "borelTheory"]; 19 20*) 21 22open HolKernel Parse boolLib bossLib metisLib arithmeticTheory pred_setTheory 23 listTheory state_transformerTheory 24 probabilityTheory formalizeUseful extra_numTheory combinTheory 25 pairTheory realTheory realLib extra_boolTheory jrhUtils 26 extra_pred_setTheory realSimps extra_realTheory measureTheory numTheory 27 simpLib seqTheory subtypeTheory 28 transcTheory limTheory stringTheory rich_listTheory stringSimps listSimps 29 lebesgueTheory borelTheory; 30 31open real_sigmaTheory; 32 33(* ------------------------------------------------------------------------- *) 34(* Start a new theory called "information" *) 35(* ------------------------------------------------------------------------- *) 36 37val _ = new_theory "information"; 38 39(* ------------------------------------------------------------------------- *) 40(* Helpful proof tools *) 41(* ------------------------------------------------------------------------- *) 42 43val REVERSE = Tactical.REVERSE; 44val Simplify = RW_TAC arith_ss; 45val Suff = PARSE_TAC SUFF_TAC; 46val Know = PARSE_TAC KNOW_TAC; 47val Rewr = DISCH_THEN (REWRITE_TAC o wrap); 48val Rewr' = DISCH_THEN (ONCE_REWRITE_TAC o wrap); 49val Cond = 50 DISCH_THEN 51 (fn mp_th => 52 let 53 val cond = fst (dest_imp (concl mp_th)) 54 in 55 KNOW_TAC cond >| [ALL_TAC, DISCH_THEN (MP_TAC o MP mp_th)] 56 end); 57 58val POP_ORW = POP_ASSUM (fn thm => ONCE_REWRITE_TAC [thm]); 59 60val safe_list_ss = bool_ss ++ LIST_ss; 61val safe_string_ss = bool_ss ++ STRING_ss; 62val arith_string_ss = arith_ss ++ STRING_ss; 63 64(* ************************************************************************* *) 65(* ************************************************************************* *) 66(* Basic Definitions *) 67(* ************************************************************************* *) 68(* ************************************************************************* *) 69 70val KL_divergence_def = Define 71 `KL_divergence b s u v = 72 integral (space s, subsets s, u) (\x. logr b ((RN_deriv (space s, subsets s, v) u) x))`; 73 74 75val mutual_information_def = Define 76 `mutual_information b p s1 s2 X Y = 77 let prod_space = 78 prod_measure_space (space s1, subsets s1, distribution p X) 79 (space s2, subsets s2, distribution p Y) in 80 KL_divergence b 81 (p_space prod_space, events prod_space) 82 (joint_distribution p X Y) 83 (prob prod_space)`; 84 85val entropy_def = Define 86 `entropy b p s X = mutual_information b p s s X X`; 87 88 89val conditional_mutual_information_def = Define 90 `conditional_mutual_information b p s1 s2 s3 X Y Z = 91 let prod_space = 92 prod_measure_space (space s2, subsets s2, distribution p Y) 93 (space s3, subsets s3, distribution p Z) in 94 mutual_information b p s1 (p_space prod_space, events prod_space) X (\x. (Y x, Z x)) - 95 mutual_information b p s1 s3 X Z`; 96 97 98(* ************************************************************************* *) 99(* ************************************************************************* *) 100(* Proofs *) 101(* ************************************************************************* *) 102(* ************************************************************************* *) 103 104(* ************************************************************************* *) 105(* Reductions *) 106(* ************************************************************************* *) 107 108 109val finite_mutual_information_reduce = store_thm 110 ("finite_mutual_information_reduce", 111 ``!b p s1 s2 X Y. (POW (p_space p) = events p) /\ 112 random_variable X p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) /\ 113 random_variable Y p (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) /\ 114 FINITE (p_space p) ==> 115 (mutual_information b p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) 116 (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) X Y = 117 SIGMA (\(x,y). (joint_distribution p X Y {(x,y)}) * 118 logr b ((joint_distribution p X Y {(x,y)})/ 119 ((distribution p X {x})*(distribution p Y {y})))) 120 ((IMAGE X (p_space p)) CROSS (IMAGE Y (p_space p))))``, 121 RW_TAC std_ss [mutual_information_def, KL_divergence_def, space_def, subsets_def] 122 >> `FINITE (IMAGE X (p_space p)) /\ FINITE (IMAGE Y (p_space p))` 123 by RW_TAC std_ss [IMAGE_FINITE] 124 >> Q.ABBREV_TAC `s1 = IMAGE X (p_space p)` 125 >> Q.ABBREV_TAC `s2 = IMAGE Y (p_space p)` 126 >> Know `prod_space = (s1 CROSS s2, POW (s1 CROSS s2), prod_measure (s1, POW s1, distribution p X) 127 (s2, POW s2, distribution p Y))` 128 >- ( Q.UNABBREV_TAC `prod_space` 129 >> RW_TAC std_ss [prod_measure_space_def, m_space_def, subsets_def, EXTENSION, subsets_def, 130 sigma_def, prod_sets_def, IN_POW, IN_BIGINTER, measurable_sets_def, SUBSET_DEF, 131 IN_CROSS, GSPECIFICATION] 132 >> RW_TAC std_ss [GSYM EXTENSION] 133 >> EQ_TAC 134 >- (RW_TAC std_ss [] >> Q.PAT_X_ASSUM `!s. P ==> x IN s` (MP_TAC o Q.SPEC `POW (s1 CROSS s2)`) 135 >> RW_TAC std_ss [IN_POW, SUBSET_DEF, IN_CROSS, POW_SIGMA_ALGEBRA] 136 >> Suff `(!x''. (?x'. (x'',T) = (\(s,t). (s CROSS t, 137 (!x. x IN s ==> x IN s1) /\ !x. x IN t ==> x IN s2)) 138 x') ==> !x. x IN x'' ==> FST x IN s1 /\ SND x IN s2)` >- METIS_TAC [] 139 >> RW_TAC std_ss [] >> (MP_TAC o Q.ISPEC `(x''' :('d -> bool) # ('e -> bool))`) pair_CASES 140 >> STRIP_TAC >> FULL_SIMP_TAC std_ss [] >> METIS_TAC [IN_CROSS] ) 141 >> RW_TAC std_ss [] 142 >> `x = BIGUNION (IMAGE (\a. {a}) x)` 143 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_BIGUNION_IMAGE, IN_SING]) 144 >> POP_ORW 145 >> FULL_SIMP_TAC std_ss [SIGMA_ALGEBRA, subsets_def] 146 >> POP_ASSUM MATCH_MP_TAC 147 >> CONJ_TAC 148 >- (MATCH_MP_TAC FINITE_COUNTABLE >> MATCH_MP_TAC IMAGE_FINITE 149 >> (MP_TAC o Q.ISPEC `(s1 :'d -> bool) CROSS (s2 :'e -> bool)`) SUBSET_FINITE 150 >> RW_TAC std_ss [FINITE_CROSS] 151 >> POP_ASSUM MATCH_MP_TAC 152 >> METIS_TAC [SUBSET_DEF, IN_CROSS]) 153 >> RW_TAC std_ss [SUBSET_DEF, IN_IMAGE, IN_SING] 154 >> Q.PAT_X_ASSUM `!x. Q ==> x IN P` MATCH_MP_TAC 155 >> Q.EXISTS_TAC `({FST a}, {SND a})` >> RW_TAC std_ss [PAIR_EQ, IN_SING] 156 >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_CROSS, IN_SING] 157 >> METIS_TAC [PAIR_EQ, PAIR, FST, SND] ) 158 >> DISCH_TAC 159 >> POP_ORW 160 >> RW_TAC std_ss [PSPACE, EVENTS] 161 >> (MP_TAC o Q.SPECL [`p`, `s1`, `s2`, `X`, `Y`] o 162 INST_TYPE [``:'c`` |-> ``:'e``, ``:'b``|->``:'d``]) finite_marginal_product_space_POW2 163 >> ASM_SIMP_TAC std_ss [] 164 >> STRIP_TAC 165 >> (MP_TAC o Q.SPEC `(s1 CROSS s2,POW (s1 CROSS s2),joint_distribution p X Y)` 166 o INST_TYPE [``:'a`` |-> ``:'d # 'e``]) finite_space_POW_integral_reduce 167 >> ASM_SIMP_TAC std_ss [m_space_def, measurable_sets_def, FINITE_CROSS] 168 >> STRIP_TAC >> POP_ASSUM (K ALL_TAC) 169 >> `FINITE (s1 CROSS s2)` 170 by RW_TAC std_ss [FINITE_CROSS] 171 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(s1 :'d -> bool) 172 CROSS (s2 :'e -> bool)`) REAL_SUM_IMAGE_IN_IF] 173 >> Suff `(\x. 174 (if x IN s1 CROSS s2 then 175 (\x. 176 logr b 177 (RN_deriv 178 (s1 CROSS s2,POW (s1 CROSS s2), 179 prob 180 (s1 CROSS s2,POW (s1 CROSS s2), 181 prod_measure (s1,POW s1,distribution p X) 182 (s2,POW s2,distribution p Y))) 183 (joint_distribution p X Y) x) * 184 measure 185 (s1 CROSS s2,POW (s1 CROSS s2),joint_distribution p X Y) 186 {x}) x 187 else 188 0)) = 189 (\x. 190 (if x IN s1 CROSS s2 then 191 (\(x,y). 192 joint_distribution p X Y {(x,y)} * 193 logr b 194 (joint_distribution p X Y {(x,y)} / 195 (distribution p X {x} * distribution p Y {y}))) x 196 else 197 0))` 198 >- RW_TAC std_ss [] 199 >> RW_TAC std_ss [FUN_EQ_THM] 200 >> RW_TAC std_ss [PROB, measure_def] 201 >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR] 202 >> POP_ORW 203 >> RW_TAC std_ss [] 204 >> Cases_on `joint_distribution p X Y {x} = 0` 205 >- RW_TAC real_ss [] 206 >> RW_TAC std_ss [REAL_MUL_COMM, REAL_EQ_LMUL] 207 >> Suff `(RN_deriv 208 (s1 CROSS s2,POW (s1 CROSS s2), 209 prod_measure (s1,POW s1,distribution p X) 210 (s2,POW s2,distribution p Y)) (joint_distribution p X Y) x) = 211 (joint_distribution p X Y {x} / 212 (distribution p X {FST x} * distribution p Y {SND x}))` 213 >- RW_TAC std_ss [] 214 >> RW_TAC std_ss [RN_deriv_def] 215 >> Suff `(\f. f x = joint_distribution p X Y {x} / 216 (distribution p X {FST x} * distribution p Y {SND x})) 217 (@f. 218 measure_space 219 (s1 CROSS s2,POW (s1 CROSS s2), 220 prod_measure (s1,POW s1,distribution p X) 221 (s2,POW s2,distribution p Y)) /\ 222 measure_space 223 (m_space 224 (s1 CROSS s2,POW (s1 CROSS s2), 225 prod_measure (s1,POW s1,distribution p X) 226 (s2,POW s2,distribution p Y)), 227 measurable_sets 228 (s1 CROSS s2,POW (s1 CROSS s2), 229 prod_measure (s1,POW s1,distribution p X) 230 (s2,POW s2,distribution p Y)),joint_distribution p X Y) /\ 231 f IN 232 borel_measurable 233 (m_space 234 (s1 CROSS s2,POW (s1 CROSS s2), 235 prod_measure (s1,POW s1,distribution p X) 236 (s2,POW s2,distribution p Y)), 237 measurable_sets 238 (s1 CROSS s2,POW (s1 CROSS s2), 239 prod_measure (s1,POW s1,distribution p X) 240 (s2,POW s2,distribution p Y))) /\ 241 !a. 242 a IN 243 measurable_sets 244 (s1 CROSS s2,POW (s1 CROSS s2), 245 prod_measure (s1,POW s1,distribution p X) 246 (s2,POW s2,distribution p Y)) ==> 247 (integral 248 (s1 CROSS s2,POW (s1 CROSS s2), 249 prod_measure (s1,POW s1,distribution p X) 250 (s2,POW s2,distribution p Y)) (\x. f x * indicator_fn a x) = 251 joint_distribution p X Y a))` 252 >- RW_TAC std_ss [] 253 >> MATCH_MP_TAC SELECT_ELIM_THM 254 >> `measure_space (s1 CROSS s2, POW (s1 CROSS s2), 255 prod_measure 256 (s1,POW s1,distribution p X) 257 (s2,POW s2,distribution p Y))` 258 by METIS_TAC [measurable_sets_def, distribution_prob_space, space_def, subsets_def, m_space_def, 259 prob_space_def, measure_space_finite_prod_measure_POW2] 260 >> RW_TAC std_ss [measurable_sets_def, m_space_def] 261 >- (Q.EXISTS_TAC `\x'. joint_distribution p X Y {x'} / 262 ((distribution p X {FST x'})*(distribution p Y {SND x'}))` 263 >> STRONG_CONJ_TAC 264 >- (`(s1 CROSS s2,POW (s1 CROSS s2)) = 265 (m_space (s1 CROSS s2,POW (s1 CROSS s2), 266 prod_measure (s1,POW s1,distribution p X) 267 (s2,POW s2,distribution p Y)), 268 measurable_sets (s1 CROSS s2,POW (s1 CROSS s2), 269 prod_measure (s1,POW s1,distribution p X) 270 (s2,POW s2,distribution p Y)))` by RW_TAC std_ss [measurable_sets_def, m_space_def] 271 >> POP_ORW 272 >> ASM_SIMP_TAC std_ss [borel_measurable_le_iff, IN_POW, SUBSET_DEF, GSPECIFICATION, 273 measurable_sets_def, m_space_def]) 274 >> RW_TAC std_ss [IN_POW] 275 >> (MP_TAC o Q.SPEC `(s1 CROSS s2,POW (s1 CROSS s2),prod_measure (s1,POW s1,distribution p X) 276 (s2,POW s2,distribution p Y))` 277 o INST_TYPE [``:'a`` |-> ``:'d # 'e``]) finite_space_POW_integral_reduce 278 >> ASM_SIMP_TAC std_ss [m_space_def, measurable_sets_def, FINITE_CROSS, measure_def] 279 >> STRIP_TAC >> POP_ASSUM (K ALL_TAC) 280 >> `(s1 CROSS s2) = a UNION ((s1 CROSS s2) DIFF a)` 281 by (ONCE_REWRITE_TAC [EXTENSION] 282 >> RW_TAC std_ss [IN_UNION, IN_DIFF] >> METIS_TAC [SUBSET_DEF]) 283 >> POP_ORW 284 >> `DISJOINT a ((s1 CROSS s2) DIFF a)` 285 by (RW_TAC std_ss [IN_DISJOINT, IN_DIFF] >> DECIDE_TAC) 286 >> `FINITE a` by METIS_TAC [SUBSET_FINITE, FINITE_CROSS, IMAGE_FINITE] 287 >> RW_TAC std_ss [FINITE_DIFF, REAL_SUM_IMAGE_DISJOINT_UNION] 288 >> `FINITE ((s1 CROSS s2) DIFF a)` 289 by RW_TAC std_ss [FINITE_DIFF] 290 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `((s1 :'d -> bool) CROSS (s2 :'e -> bool)) DIFF a`) 291 REAL_SUM_IMAGE_IN_IF] 292 >> `(\x. (if x IN s1 CROSS s2 DIFF a then 293 (\x'. 294 joint_distribution p X Y {x'} / 295 (distribution p X {FST x'} * distribution p Y {SND x'}) * indicator_fn a x' * 296 prod_measure (s1,POW s1,distribution p X) 297 (s2,POW s2,distribution p Y) {x'}) x else 0)) = (\x. 0)` 298 by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [IN_DIFF, indicator_fn_def]) 299 >> RW_TAC real_ss [REAL_SUM_IMAGE_0] 300 >> POP_ASSUM (K ALL_TAC) 301 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(a :'d # 'e -> bool)`) REAL_SUM_IMAGE_IN_IF] 302 >> Know `(\x. (if x IN a then (\x'. 303 joint_distribution p X Y {x'} / 304 (distribution p X {FST x'} * distribution p Y {SND x'}) * 305 indicator_fn a x' * 306 prod_measure (s1,POW s1,distribution p X) 307 (s2,POW s2,distribution p Y) {x'}) x else 0)) = 308 (\x. if x IN a then 309 (\x. joint_distribution p X Y {x}) x else 0)` 310 311 >- ( RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [indicator_fn_def] 312 >> `{x'} = {FST x'} CROSS {SND x'}` 313 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_SING, IN_CROSS] 314 >> (MP_TAC o Q.ISPEC `(x'' :'d # 'e)`) pair_CASES 315 >> (MP_TAC o Q.ISPEC `(x' :'d # 'e)`) pair_CASES 316 >> RW_TAC std_ss []) 317 >> POP_ORW 318 >> `prod_measure (s1,POW s1,distribution p X) (s2,POW s2,distribution p Y) 319 ({FST x'} CROSS {SND x'}) = 320 measure (s1,POW s1,distribution p X) {FST x'} * 321 measure (s2,POW s2,distribution p Y) {SND x'}` 322 by (`measure_space (s1,POW s1,distribution p X) /\ 323 measure_space (s2,POW s2,distribution p Y)` 324 by METIS_TAC [distribution_prob_space, prob_space_def, space_def, subsets_def] 325 >> `{FST x'} IN measurable_sets (s1,POW s1,distribution p X) /\ 326 {SND x'} IN measurable_sets (s2,POW s2,distribution p Y)` 327 by (RW_TAC std_ss [measurable_sets_def, IN_POW, SUBSET_DEF, IN_SING] 328 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_CROSS]) 329 >> METIS_TAC [finite_POW_prod_measure_reduce, m_space_def, measurable_sets_def]) 330 >> POP_ORW 331 >> RW_TAC std_ss [measure_def] 332 >> Cases_on `(distribution p X {FST x'} * distribution p Y {SND x'}) = 0` 333 >- (Suff `joint_distribution p X Y ({FST x'} CROSS {SND x'}) = 0` 334 >- RW_TAC real_ss [] 335 >> `PREIMAGE X {FST x'} INTER p_space p IN events p` 336 by (Q.UNABBREV_TAC `s1` 337 >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, 338 subsets_def, IN_POW] 339 >> Suff `{FST x'} SUBSET IMAGE X (p_space p)` >- METIS_TAC [] 340 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS]) 341 >> `PREIMAGE Y {SND x'} INTER p_space p IN events p` 342 by (Q.UNABBREV_TAC `s2` 343 >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, 344 subsets_def, IN_POW] 345 >> Suff `{SND x'} SUBSET IMAGE Y (p_space p)` >- METIS_TAC [] 346 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS]) 347 >> `(PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p) IN events p` 348 by (`PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p = 349 (PREIMAGE X {FST x'} INTER p_space p) INTER 350 (PREIMAGE Y {SND x'} INTER p_space p)` 351 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER] >> DECIDE_TAC) 352 >> POP_ORW 353 >> FULL_SIMP_TAC std_ss [random_variable_def, prob_space_def, measure_space_def, 354 events_def] 355 >> METIS_TAC [sigma_algebra_def, ALGEBRA_INTER, space_def, subsets_def]) 356 >> FULL_SIMP_TAC std_ss [REAL_ENTIRE, distribution_def, joint_distribution_def] 357 >- (`(PREIMAGE (\x. (X x,Y x)) ({FST x'} CROSS {SND x'}) INTER p_space p) = 358 (PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p)` 359 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_CROSS, IN_SING]) 360 >> POP_ORW 361 >> RW_TAC std_ss [GSYM REAL_LE_ANTISYM] 362 >- (Suff `(PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p) SUBSET 363 (PREIMAGE X {FST x'} INTER p_space p)` 364 >- METIS_TAC [REAL_LE_TRANS, REAL_LE_REFL, PROB_INCREASING, random_variable_def] 365 >> RW_TAC std_ss [SUBSET_DEF, IN_INTER]) 366 >> METIS_TAC [random_variable_def, PROB_POSITIVE]) 367 >> `(PREIMAGE (\x. (X x,Y x)) ({FST x'} CROSS {SND x'}) INTER p_space p) = 368 (PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p)` 369 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_CROSS, IN_SING]) 370 >> POP_ORW 371 >> RW_TAC std_ss [GSYM REAL_LE_ANTISYM] 372 >- (Suff `(PREIMAGE X {FST x'} INTER PREIMAGE Y {SND x'} INTER p_space p) SUBSET 373 (PREIMAGE Y {SND x'} INTER p_space p)` 374 >- METIS_TAC [REAL_LE_TRANS, REAL_LE_REFL, PROB_INCREASING, random_variable_def] 375 >> RW_TAC std_ss [SUBSET_DEF, IN_INTER]) 376 >> METIS_TAC [random_variable_def, PROB_POSITIVE]) 377 >> RW_TAC std_ss [real_div, measure_def] 378 >> `joint_distribution p X Y ({FST x'} CROSS {SND x'}) * 379 inv (distribution p X {FST x'} * distribution p Y {SND x'}) * 380 (distribution p X {FST x'} * distribution p Y {SND x'}) = 381 (inv (distribution p X {FST x'} * distribution p Y {SND x'}) * 382 (distribution p X {FST x'} * distribution p Y {SND x'})) * 383 joint_distribution p X Y ({FST x'} CROSS {SND x'})` 384 by REAL_ARITH_TAC 385 >> POP_ORW 386 >> RW_TAC real_ss [REAL_MUL_LINV]) 387 >> DISCH_TAC 388 >> POP_ORW 389 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF] 390 >> Q.PAT_X_ASSUM `a SUBSET s1 CROSS s2` MP_TAC 391 >> CONV_TAC (UNBETA_CONV ``(a :'d # 'e -> bool)``) 392 >> Q.PAT_X_ASSUM `FINITE a` MP_TAC 393 >> Q.SPEC_TAC (`a`,`a`) 394 >> MATCH_MP_TAC FINITE_INDUCT 395 >> FULL_SIMP_TAC std_ss [random_variable_def] 396 >> RW_TAC std_ss [REAL_SUM_IMAGE_THM, PREIMAGE_EMPTY, INTER_EMPTY, PROB_EMPTY, DELETE_NON_ELEMENT, 397 joint_distribution_def] 398 >> `(PREIMAGE (\x. (X x,Y x)) (e INSERT s) INTER p_space p) = 399 (PREIMAGE (\x. (X x,Y x)) {e} INTER p_space p) UNION 400 (PREIMAGE (\x. (X x,Y x)) s INTER p_space p)` 401 by (ONCE_REWRITE_TAC [EXTENSION] 402 >> RW_TAC std_ss [IN_UNION, IN_INTER, IN_PREIMAGE, IN_INSERT, NOT_IN_EMPTY] 403 >> FULL_SIMP_TAC std_ss [GSYM DELETE_NON_ELEMENT, SUBSET_DEF, IN_INSERT, IN_CROSS, IN_IMAGE] 404 >> METIS_TAC [FST, SND]) 405 >> POP_ORW 406 >> `DISJOINT (PREIMAGE (\x. (X x,Y x)) {e} INTER p_space p) 407 (PREIMAGE (\x. (X x,Y x)) s INTER p_space p)` 408 by (RW_TAC std_ss [IN_DISJOINT, IN_INTER, IN_PREIMAGE, IN_SING] 409 >> FULL_SIMP_TAC std_ss [GSYM DELETE_NON_ELEMENT] 410 >> METIS_TAC []) 411 >> `prob p (PREIMAGE (\x. (X x,Y x)) {e} INTER p_space p UNION 412 PREIMAGE (\x. (X x,Y x)) s INTER p_space p) = 413 prob p (PREIMAGE (\x. (X x,Y x)) {e} INTER p_space p) + 414 prob p (PREIMAGE (\x. (X x,Y x)) s INTER p_space p)` 415 by (MATCH_MP_TAC PROB_ADDITIVE 416 >> RW_TAC std_ss [] 417 >> METIS_TAC [SUBSET_DEF, IN_INTER, IN_POW]) 418 >> POP_ORW 419 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_INSERT]) 420 >> POP_ASSUM (MP_TAC o Q.SPEC `{x}`) 421 >> RW_TAC std_ss [IN_POW, SUBSET_DEF, IN_SING, finite_space_POW_integral_reduce, 422 FINITE_CROSS, IMAGE_FINITE, measurable_sets_def, m_space_def, 423 measure_def] 424 >> POP_ASSUM MP_TAC 425 >> `(s1 CROSS s2) = 426 x INSERT ((s1 CROSS s2) DELETE x)` 427 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INSERT, IN_DELETE] >> METIS_TAC []) 428 >> POP_ORW 429 >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_THM, FINITE_CROSS, FINITE_DELETE, DELETE_DELETE] 430 >> `FINITE (s1 CROSS s2 DELETE x)` 431 by RW_TAC std_ss [FINITE_DELETE, FINITE_CROSS] 432 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `((s1 :'d -> bool) CROSS 433 (s2 :'e -> bool) DELETE (x :'d # 'e))`) REAL_SUM_IMAGE_IN_IF] 434 >> `(\x''. 435 (if x'' IN s1 CROSS s2 DELETE x then 436 (\x''. 437 x' x'' * indicator_fn {x} x'' * 438 prod_measure (s1,POW s1,distribution p X) 439 (s2,POW s2,distribution p Y) {x''}) x'' 440 else 441 0)) = (\x'. 0)` 442 by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [indicator_fn_def, IN_SING, IN_DELETE]) 443 >> POP_ORW 444 >> ASM_SIMP_TAC real_ss [REAL_SUM_IMAGE_0, indicator_fn_def, IN_SING] 445 >> `{(x :'d # 'e)} = {FST x} CROSS {SND x}` 446 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_SING, IN_CROSS] 447 >> (MP_TAC o Q.ISPEC `(x'' :'d # 'e)`) pair_CASES 448 >> (MP_TAC o Q.ISPEC `(x :'d # 'e)`) pair_CASES 449 >> RW_TAC std_ss []) 450 >> FULL_SIMP_TAC std_ss [] >> POP_ASSUM (K ALL_TAC) 451 >> `prod_measure (s1,POW s1,distribution p X) (s2,POW s2,distribution p Y) 452 ({FST x} CROSS {SND x}) = 453 measure (s1,POW s1,distribution p X) {FST x} * 454 measure (s2,POW s2,distribution p Y) {SND x}` 455 by (`measure_space (s1,POW s1,distribution p X) /\ 456 measure_space (s2,POW s2,distribution p Y)` 457 by METIS_TAC [distribution_prob_space, prob_space_def, space_def, subsets_def] 458 >> `{FST x} IN measurable_sets (s1,POW s1,distribution p X) /\ 459 {SND x} IN measurable_sets (s2,POW s2,distribution p Y)` 460 by (RW_TAC std_ss [measurable_sets_def, IN_POW, SUBSET_DEF, IN_SING] 461 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_CROSS]) 462 >> METIS_TAC [finite_POW_prod_measure_reduce, m_space_def, measurable_sets_def]) 463 >> POP_ORW 464 >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR] 465 >> POP_ORW 466 >> FULL_SIMP_TAC std_ss [measure_def, joint_distribution_def, distribution_def] 467 >> `PREIMAGE (\x. (X x,Y x)) ({FST x} CROSS {SND x}) INTER 468 p_space p = 469 PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER p_space p` 470 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING, IN_CROSS]) 471 >> FULL_SIMP_TAC std_ss [] >> POP_ASSUM (K ALL_TAC) 472 >> POP_ORW 473 >> Suff `0 < (prob p (PREIMAGE X {FST x} INTER p_space p) * 474 prob p (PREIMAGE Y {SND x} INTER p_space p))` 475 >- RW_TAC real_ss [REAL_EQ_RDIV_EQ] 476 >> MATCH_MP_TAC REAL_LT_MUL 477 >> SIMP_TAC std_ss [REAL_LT_LE] 478 >> `PREIMAGE X {FST x} INTER p_space p IN events p` 479 by (Q.UNABBREV_TAC `s1` 480 >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def, 481 IN_POW] 482 >> Suff `{FST x} SUBSET IMAGE X (p_space p)` >- METIS_TAC [] 483 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS]) 484 >> `PREIMAGE Y {SND x} INTER p_space p IN events p` 485 by (Q.UNABBREV_TAC `s2` 486 >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def, 487 IN_POW] 488 >> Suff `{SND x} SUBSET IMAGE Y (p_space p)` >- METIS_TAC [] 489 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS]) 490 >> `PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER p_space p IN events p` 491 by (`PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER p_space p = 492 (PREIMAGE X {FST x} INTER p_space p) INTER 493 (PREIMAGE Y {SND x} INTER p_space p)` 494 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER] >> DECIDE_TAC) 495 >> POP_ORW 496 >> FULL_SIMP_TAC std_ss [random_variable_def, prob_space_def, measure_space_def, 497 events_def] 498 >> METIS_TAC [sigma_algebra_def, ALGEBRA_INTER, space_def, subsets_def]) 499 >> Suff `~(0 = prob p (PREIMAGE X {FST x} INTER p_space p)) /\ 500 ~(0 = prob p (PREIMAGE Y {SND x} INTER p_space p))` 501 >- METIS_TAC [random_variable_def, PROB_POSITIVE] 502 >> `0 <= prob p 503 (PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER 504 p_space p)` 505 by METIS_TAC [random_variable_def, PROB_POSITIVE] 506 >> (CONJ_TAC >> SPOSE_NOT_THEN STRIP_ASSUME_TAC) 507 >- (Suff `prob p (PREIMAGE (X :'a -> 'd) {FST x} INTER PREIMAGE (Y :'a -> 'e) {SND x} INTER p_space p) <= 508 prob p (PREIMAGE (X :'a -> 'd) {FST x} INTER p_space p)` 509 >- METIS_TAC [REAL_LE_ANTISYM] 510 >> MATCH_MP_TAC PROB_INCREASING 511 >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER]) 512 >> Suff `prob p (PREIMAGE X {FST x} INTER PREIMAGE Y {SND x} INTER p_space p) <= 513 prob p (PREIMAGE Y {SND x} INTER p_space p)` 514 >- METIS_TAC [REAL_LE_ANTISYM] 515 >> MATCH_MP_TAC PROB_INCREASING 516 >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER]); 517 518val finite_entropy_reduce = store_thm 519 ("finite_entropy_reduce", 520 ``!b p X. (POW (p_space p) = events p) /\ 521 random_variable X p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) /\ 522 FINITE (p_space p) ==> 523 (entropy b p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) X = 524 ~ SIGMA (\x. distribution p X {x} * logr b (distribution p X {x})) 525 (IMAGE X (p_space p)))``, 526 RW_TAC std_ss [entropy_def, finite_mutual_information_reduce] 527 >> `(IMAGE X (p_space p) CROSS IMAGE X (p_space p)) = 528 (IMAGE (\x. (x,x)) (IMAGE X (p_space p))) UNION 529 ((IMAGE X (p_space p) CROSS IMAGE X (p_space p)) DIFF 530 (IMAGE (\x. (x,x)) (IMAGE X (p_space p))))` 531 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_CROSS, IN_IMAGE, IN_UNION, IN_DIFF] 532 >> EQ_TAC 533 >- (RW_TAC std_ss [] >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR] 534 >> POP_ORW >> METIS_TAC []) 535 >> RW_TAC std_ss [FST, SND] >> RW_TAC std_ss [FST, SND] 536 >> METIS_TAC []) 537 >> POP_ORW 538 >> `DISJOINT (IMAGE (\x. (x,x)) (IMAGE X (p_space p))) 539 (IMAGE X (p_space p) CROSS IMAGE X (p_space p) DIFF 540 IMAGE (\x. (x,x)) (IMAGE X (p_space p)))` 541 by (RW_TAC std_ss [IN_DISJOINT, IN_DIFF] >> DECIDE_TAC) 542 >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_DISJOINT_UNION, FINITE_CROSS, IMAGE_FINITE, FINITE_DIFF] 543 >> `FINITE (IMAGE X (p_space p) CROSS IMAGE X (p_space p) DIFF 544 IMAGE (\x. (x,x)) (IMAGE X (p_space p)))` 545 by RW_TAC std_ss [FINITE_CROSS, IMAGE_FINITE, FINITE_DIFF] 546 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p) CROSS IMAGE (X :'a -> 'b) (p_space p) DIFF 547 IMAGE (\x. (x,x)) (IMAGE (X :'a -> 'b) (p_space p)))`) REAL_SUM_IMAGE_IN_IF] 548 >> `(\x. 549 (if 550 x IN 551 IMAGE X (p_space p) CROSS IMAGE X (p_space p) DIFF 552 IMAGE (\x. (x,x)) (IMAGE X (p_space p)) 553 then 554 (\(x,y). 555 joint_distribution p X X {(x,y)} * 556 logr b 557 (joint_distribution p X X {(x,y)} / (distribution p X {x} * distribution p X {y}))) 558 x 559 else 560 0)) = (\x. 0)` 561 by (RW_TAC std_ss [FUN_EQ_THM, IN_IMAGE, IN_DIFF] >> RW_TAC std_ss [] 562 >> FULL_SIMP_TAC std_ss [IN_CROSS, IN_IMAGE] 563 >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR] 564 >> POP_ORW 565 >> ASM_SIMP_TAC std_ss [joint_distribution_def] 566 >> `PREIMAGE (\x. (X x,X x)) {x} INTER p_space p = 567 PREIMAGE X {X x'} INTER PREIMAGE X {X x''} INTER p_space p` 568 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING, IN_CROSS] 569 >> METIS_TAC [PAIR, FST, SND, PAIR_EQ]) 570 >> POP_ORW 571 >> Suff `PREIMAGE X {X x'} INTER PREIMAGE X {X x''} INTER p_space p = {}` 572 >- FULL_SIMP_TAC real_ss [PROB_EMPTY, random_variable_def] 573 >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_PREIMAGE, IN_INTER, IN_SING, NOT_IN_EMPTY] 574 >> METIS_TAC [PAIR]) 575 >> POP_ORW >> ASM_SIMP_TAC real_ss [REAL_SUM_IMAGE_0] 576 >> `INJ (\x. (x,x)) (IMAGE X (p_space p)) (IMAGE (\x. (x,x)) (IMAGE X (p_space p)))` 577 by RW_TAC std_ss [INJ_DEF, IN_IMAGE] 578 >> ASM_SIMP_TAC std_ss [IMAGE_FINITE, REAL_SUM_IMAGE_IMAGE, o_DEF] 579 >> `~SIGMA (\x. distribution p X {x} * logr b (distribution p X {x})) (IMAGE X (p_space p)) = 580 ~1 * SIGMA (\x. distribution p X {x} * logr b (distribution p X {x})) (IMAGE X (p_space p))` 581 by REAL_ARITH_TAC 582 >> POP_ORW 583 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_CMUL, IMAGE_FINITE] 584 >> Suff `(\x. 585 joint_distribution p X X {(x,x)} * 586 logr b (joint_distribution p X X {(x,x)} / (distribution p X {x} * distribution p X {x}))) = 587 (\x. ~1 * (distribution p X {x} * logr b (distribution p X {x})))` 588 >- RW_TAC std_ss [] 589 >> RW_TAC std_ss [FUN_EQ_THM, joint_distribution_def, distribution_def] 590 >> `PREIMAGE (\x. (X x,X x)) {(x,x)} INTER p_space p = 591 PREIMAGE X {x} INTER p_space p` 592 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING, IN_CROSS, INTER_IDEMPOT] 593 >> METIS_TAC [PAIR, FST, SND, PAIR_EQ]) 594 >> POP_ORW 595 >> RW_TAC real_ss [] 596 >> Cases_on `prob p (PREIMAGE X {x} INTER p_space p) = 0` 597 >- RW_TAC real_ss [] 598 >> `0 < prob p (PREIMAGE X {x} INTER p_space p)` 599 by METIS_TAC [REAL_LT_LE, PROB_POSITIVE, random_variable_def, SUBSET_DEF, IN_POW, IN_INTER] 600 >> Suff `prob p (PREIMAGE X {x} INTER p_space p) / 601 (prob p (PREIMAGE X {x} INTER p_space p) * 602 prob p (PREIMAGE X {x} INTER p_space p)) = 603 inv (prob p (PREIMAGE X {x} INTER p_space p))` 604 >- RW_TAC std_ss [REAL_NEG_RMUL, logr_inv] 605 >> RW_TAC std_ss [real_div, REAL_INV_MUL] 606 >> RW_TAC real_ss [REAL_MUL_RINV, REAL_MUL_ASSOC]); 607 608val finite_mutual_information_reduce2 = store_thm 609 ("finite_mutual_information_reduce2", 610 ``!b p s1 s2 X Y Z. (POW (p_space p) = events p) /\ 611 random_variable X p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) /\ 612 random_variable Y p (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) /\ 613 random_variable Z p (IMAGE Z (p_space p), POW (IMAGE Z (p_space p))) /\ 614 615 FINITE (p_space p) ==> 616 (mutual_information b p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) 617 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p), 618 POW (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))) X (\x. (Y x,Z x)) = 619 SIGMA (\(x,(y,z)). (joint_distribution p X (\x. (Y x,Z x)) {(x,(y,z))}) * 620 logr b ((joint_distribution p X (\x. (Y x,Z x)) {(x,(y,z))})/ 621 ((distribution p X {x})*(distribution p (\x. (Y x,Z x)) {(y,z)})))) 622 ((IMAGE X (p_space p)) CROSS (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))))``, 623 RW_TAC std_ss [mutual_information_def, KL_divergence_def, space_def, subsets_def] 624 >> `FINITE (IMAGE X (p_space p)) /\ FINITE (IMAGE Y (p_space p)) /\ FINITE (IMAGE Z (p_space p)) /\ 625 FINITE (IMAGE (\x. (Y x,Z x)) (p_space p))` 626 by RW_TAC std_ss [IMAGE_FINITE] 627 >> Q.ABBREV_TAC `s1 = IMAGE X (p_space p)` 628 >> Q.ABBREV_TAC `s2 = (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))` 629 >> `FINITE s2` by METIS_TAC [FINITE_CROSS] 630 >> Know `prod_space = (s1 CROSS s2, POW (s1 CROSS s2), 631 prod_measure (s1, POW s1, distribution p X) 632 (s2, POW s2, distribution p (\x. (Y x,Z x))))` 633 >- ( Q.UNABBREV_TAC `prod_space` 634 >> RW_TAC std_ss [prod_measure_space_def, m_space_def, subsets_def, EXTENSION, subsets_def, 635 sigma_def, prod_sets_def, IN_POW, IN_BIGINTER, measurable_sets_def, 636 SUBSET_DEF, IN_CROSS, GSPECIFICATION] 637 >> RW_TAC std_ss [GSYM EXTENSION] 638 >> EQ_TAC 639 >- (RW_TAC std_ss [] 640 >> Q.PAT_X_ASSUM `!s. P ==> x IN s` (MP_TAC o Q.SPEC `POW (s1 CROSS s2)`) 641 >> RW_TAC std_ss [IN_POW, SUBSET_DEF, IN_CROSS, POW_SIGMA_ALGEBRA] 642 >> Suff `(!x''. (?x'. (x'',T) = (\(s,t). (s CROSS t, 643 (!x. x IN s ==> x IN s1) /\ !x. x IN t ==> x IN s2)) 644 x') ==> !x. x IN x'' ==> FST x IN s1 /\ SND x IN s2)` >- METIS_TAC [] 645 >> RW_TAC std_ss [] 646 >> (MP_TAC o Q.ISPEC `(x''' :('d -> bool) # ('e # 'f -> bool))`) pair_CASES 647 >> STRIP_TAC >> FULL_SIMP_TAC std_ss [] >> METIS_TAC [IN_CROSS]) 648 649 >> RW_TAC std_ss [] 650 >> `x = BIGUNION (IMAGE (\a. {a}) x)` 651 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_BIGUNION_IMAGE, IN_SING]) 652 >> POP_ORW 653 >> FULL_SIMP_TAC std_ss [SIGMA_ALGEBRA, subsets_def] 654 >> POP_ASSUM MATCH_MP_TAC 655 >> CONJ_TAC 656 >- (MATCH_MP_TAC FINITE_COUNTABLE >> MATCH_MP_TAC IMAGE_FINITE 657 >> (MP_TAC o Q.ISPEC `(s1 :'d -> bool) CROSS (s2 :'e # 'f -> bool)`) SUBSET_FINITE 658 >> RW_TAC std_ss [FINITE_CROSS] 659 >> POP_ASSUM MATCH_MP_TAC 660 >> METIS_TAC [SUBSET_DEF, IN_CROSS]) 661 >> RW_TAC std_ss [SUBSET_DEF, IN_IMAGE, IN_SING] 662 >> Q.PAT_X_ASSUM `!x. Q ==> x IN P` MATCH_MP_TAC 663 >> Q.EXISTS_TAC `({FST a}, {SND a})` >> RW_TAC std_ss [PAIR_EQ, IN_SING] 664 >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_CROSS, IN_SING] 665 >> METIS_TAC [PAIR_EQ, PAIR, FST, SND] ) 666 >> DISCH_TAC 667 >> POP_ORW 668 >> RW_TAC std_ss [PSPACE, EVENTS] 669 >> `random_variable (\x. (Y x, Z x)) p (s2, POW(s2))` 670 by (RW_TAC std_ss [random_variable_def] 671 >- FULL_SIMP_TAC std_ss [random_variable_def] 672 >> FULL_SIMP_TAC std_ss [IN_MEASURABLE, random_variable_def, POW_SIGMA_ALGEBRA, IN_FUNSET, 673 space_def, subsets_def, IN_POW] 674 >> CONJ_TAC >- (Q.UNABBREV_TAC `s2` >> RW_TAC std_ss [IN_IMAGE, IN_CROSS] >> METIS_TAC []) 675 >> METIS_TAC [SUBSET_DEF, IN_INTER, IN_POW]) 676 >> (MP_TAC o Q.SPECL [`p`, `s1`, `s2`, `X`, `(\x. (Y x,Z x))`] o 677 INST_TYPE [``:'c`` |-> ``:'e#'f``, ``:'b``|->``:'d``]) finite_marginal_product_space_POW2 678 >> ASM_SIMP_TAC std_ss [] 679 >> STRIP_TAC 680 >> (MP_TAC o Q.SPEC `(s1 CROSS s2,POW (s1 CROSS s2),joint_distribution p X (\x. (Y x,Z x)))` 681 o INST_TYPE [``:'a`` |-> ``:'d # 'e # 'f``]) finite_space_POW_integral_reduce 682 >> ASM_SIMP_TAC std_ss [m_space_def, measurable_sets_def, FINITE_CROSS] 683 >> STRIP_TAC >> POP_ASSUM (K ALL_TAC) 684 >> `FINITE (s1 CROSS s2)` 685 by RW_TAC std_ss [FINITE_CROSS] 686 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(s1 :'d -> bool) 687 CROSS (s2 :'e # 'f -> bool)`) REAL_SUM_IMAGE_IN_IF] 688 >> Suff `(\x. 689 (if x IN s1 CROSS s2 then 690 (\x. 691 logr b 692 (RN_deriv 693 (s1 CROSS s2,POW (s1 CROSS s2), 694 prob 695 (s1 CROSS s2,POW (s1 CROSS s2), 696 prod_measure (s1,POW s1,distribution p X) 697 (s2,POW s2,distribution p (\x. (Y x,Z x))))) 698 (joint_distribution p X (\x. (Y x,Z x))) x) * 699 measure 700 (s1 CROSS s2,POW (s1 CROSS s2), 701 joint_distribution p X (\x. (Y x,Z x))) {x}) x 702 else 703 0)) = 704 (\x. 705 (if x IN s1 CROSS s2 then 706 (\(x,y,z). 707 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 708 logr b 709 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 710 (distribution p X {x} * 711 distribution p (\x. (Y x,Z x)) {(y,z)}))) x 712 else 713 0))` 714 >- RW_TAC std_ss [] 715 >> RW_TAC std_ss [FUN_EQ_THM] 716 >> RW_TAC std_ss [PROB, measure_def] 717 >> `x = (FST x, FST(SND x), SND(SND x))` by RW_TAC std_ss [PAIR] 718 >> POP_ORW 719 >> RW_TAC std_ss [] 720 >> Cases_on `joint_distribution p X (\x. (Y x,Z x)) {x} = 0` 721 >- RW_TAC real_ss [] 722 >> RW_TAC std_ss [REAL_MUL_COMM, REAL_EQ_LMUL] 723 >> Suff `(RN_deriv 724 (s1 CROSS s2,POW (s1 CROSS s2), 725 prod_measure (s1,POW s1,distribution p X) 726 (s2,POW s2,distribution p (\x. (Y x,Z x)))) 727 (joint_distribution p X (\x. (Y x,Z x))) x) = 728 (joint_distribution p X (\x. (Y x,Z x)) {x} / 729 (distribution p X {FST x} * distribution p (\x. (Y x,Z x)) {SND x}))` 730 >- RW_TAC std_ss [] 731 >> RW_TAC std_ss [RN_deriv_def] 732 >> Suff `(\f. f x = joint_distribution p X (\x. (Y x,Z x)) {x} / 733(distribution p X {FST x} * distribution p (\x. (Y x,Z x)) {SND x})) 734 (@f. 735 measure_space 736 (s1 CROSS s2,POW (s1 CROSS s2), 737 prod_measure (s1,POW s1,distribution p X) 738 (s2,POW s2,distribution p (\x. (Y x,Z x)))) /\ 739 measure_space 740 (m_space 741 (s1 CROSS s2,POW (s1 CROSS s2), 742 prod_measure (s1,POW s1,distribution p X) 743 (s2,POW s2,distribution p (\x. (Y x,Z x)))), 744 measurable_sets 745 (s1 CROSS s2,POW (s1 CROSS s2), 746 prod_measure (s1,POW s1,distribution p X) 747 (s2,POW s2,distribution p (\x. (Y x,Z x)))), 748 joint_distribution p X (\x. (Y x,Z x))) /\ 749 f IN 750 borel_measurable 751 (m_space 752 (s1 CROSS s2,POW (s1 CROSS s2), 753 prod_measure (s1,POW s1,distribution p X) 754 (s2,POW s2,distribution p (\x. (Y x,Z x)))), 755 measurable_sets 756 (s1 CROSS s2,POW (s1 CROSS s2), 757 prod_measure (s1,POW s1,distribution p X) 758 (s2,POW s2,distribution p (\x. (Y x,Z x))))) /\ 759 !a. 760 a IN 761 measurable_sets 762 (s1 CROSS s2,POW (s1 CROSS s2), 763 prod_measure (s1,POW s1,distribution p X) 764 (s2,POW s2,distribution p (\x. (Y x,Z x)))) ==> 765 (integral 766 (s1 CROSS s2,POW (s1 CROSS s2), 767 prod_measure (s1,POW s1,distribution p X) 768 (s2,POW s2,distribution p (\x. (Y x,Z x)))) 769 (\x. f x * indicator_fn a x) = 770 joint_distribution p X (\x. (Y x,Z x)) a))` 771 >- RW_TAC std_ss [] 772 >> MATCH_MP_TAC SELECT_ELIM_THM 773 >> `measure_space (s1 CROSS s2, POW (s1 CROSS s2), 774 prod_measure (s1,POW s1,distribution p X) 775 (s2,POW s2,distribution p (\x. (Y x,Z x))))` 776 by METIS_TAC [measurable_sets_def, distribution_prob_space, space_def, subsets_def, m_space_def, 777 prob_space_def, measure_space_finite_prod_measure_POW2] 778 >> RW_TAC std_ss [measurable_sets_def, m_space_def] 779 >- (Q.EXISTS_TAC `\x'. joint_distribution p X (\x. (Y x,Z x)) {x'} / 780 ((distribution p X {FST x'})*(distribution p (\x. (Y x,Z x)) {SND x'}))` 781 >> STRONG_CONJ_TAC 782 >- (`(s1 CROSS s2,POW (s1 CROSS s2)) = 783 (m_space (s1 CROSS s2,POW (s1 CROSS s2), 784 prod_measure (s1,POW s1,distribution p X) 785 (s2,POW s2,distribution p (\x. (Y x,Z x)))), 786 measurable_sets (s1 CROSS s2,POW (s1 CROSS s2), 787 prod_measure (s1,POW s1,distribution p X) 788 (s2,POW s2,distribution p (\x. (Y x,Z x)))))` by RW_TAC std_ss [measurable_sets_def, m_space_def] 789 >> POP_ORW 790 >> ASM_SIMP_TAC std_ss [borel_measurable_le_iff, IN_POW, SUBSET_DEF, GSPECIFICATION, measurable_sets_def, m_space_def]) 791 >> RW_TAC std_ss [IN_POW] 792 >> (MP_TAC o Q.SPEC `(s1 CROSS s2,POW (s1 CROSS s2), 793 prod_measure (s1,POW s1,distribution p X) 794 (s2,POW s2,distribution p (\x. (Y x,Z x))))` 795 o INST_TYPE [``:'a`` |-> ``:'d # 'e # 'f``]) finite_space_POW_integral_reduce 796 >> ASM_SIMP_TAC std_ss [m_space_def, measurable_sets_def, FINITE_CROSS, measure_def] 797 >> STRIP_TAC >> POP_ASSUM (K ALL_TAC) 798 >> `(s1 CROSS s2) = 799 a UNION ((s1 CROSS s2) DIFF a)` 800 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNION, IN_DIFF] >> METIS_TAC [SUBSET_DEF]) 801 >> POP_ORW 802 >> `DISJOINT a ((s1 CROSS s2) DIFF a)` 803 by (RW_TAC std_ss [IN_DISJOINT, IN_DIFF] >> DECIDE_TAC) 804 >> `FINITE a` by METIS_TAC [SUBSET_FINITE, FINITE_CROSS, IMAGE_FINITE] 805 >> RW_TAC std_ss [FINITE_DIFF, REAL_SUM_IMAGE_DISJOINT_UNION] 806 >> `FINITE ((s1 CROSS s2) DIFF a)` 807 by RW_TAC std_ss [FINITE_DIFF] 808 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `((s1 :'d -> bool) CROSS (s2 :'e # 'f -> bool)) DIFF a`) REAL_SUM_IMAGE_IN_IF] 809 >> `(\x. (if x IN s1 CROSS s2 DIFF a then (\x'. 810 joint_distribution p X (\x. (Y x,Z x)) {x'} / 811 (distribution p X {FST x'} * 812 distribution p (\x. (Y x,Z x)) {SND x'}) * 813 indicator_fn a x' * 814 prod_measure (s1,POW s1,distribution p X) 815 (s2,POW s2,distribution p (\x. (Y x,Z x))) {x'}) x else 0)) = (\x. 0)` 816 by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [IN_DIFF, indicator_fn_def]) 817 >> RW_TAC real_ss [REAL_SUM_IMAGE_0] 818 >> POP_ASSUM (K ALL_TAC) 819 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(a :'d # 'e # 'f -> bool)`) REAL_SUM_IMAGE_IN_IF] 820 >> `(\x. (if x IN a then (\x'. 821 joint_distribution p X (\x. (Y x,Z x)) {x'} / 822 (distribution p X {FST x'} * 823 distribution p (\x. (Y x,Z x)) {SND x'}) * 824 indicator_fn a x' * 825 prod_measure (s1,POW s1,distribution p X) 826 (s2,POW s2,distribution p (\x. (Y x,Z x))) {x'}) x else 0)) = 827 (\x. if x IN a then 828 (\x. joint_distribution p X (\x. (Y x,Z x)) {x}) x else 0)` 829 by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [indicator_fn_def] 830 >> `{x'} = {FST x'} CROSS {SND x'}` 831 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_SING, IN_CROSS] 832 >> (MP_TAC o Q.ISPEC `(x'' :'d # 'e # 'f)`) pair_CASES 833 >> (MP_TAC o Q.ISPEC `(x' :'d # 'e # 'f)`) pair_CASES 834 >> RW_TAC std_ss []) 835 >> POP_ORW 836 >> `prod_measure (s1,POW s1,distribution p X) (s2,POW s2,distribution p (\x. (Y x,Z x))) 837 ({FST x'} CROSS {SND x'}) = 838 measure (s1,POW s1,distribution p X) {FST x'} * 839 measure (s2,POW s2,distribution p (\x. (Y x,Z x))) {SND x'}` 840 by (`measure_space (s1,POW s1,distribution p X) /\ 841 measure_space (s2,POW s2,distribution p (\x. (Y x,Z x)))` 842 by METIS_TAC [distribution_prob_space, prob_space_def, space_def, subsets_def] 843 >> `{FST x'} IN measurable_sets (s1,POW s1,distribution p X) /\ 844 {SND x'} IN measurable_sets (s2,POW s2,distribution p (\x. (Y x,Z x)))` 845 by (RW_TAC std_ss [measurable_sets_def, IN_POW, SUBSET_DEF, IN_SING] 846 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_CROSS]) 847 >> METIS_TAC [finite_POW_prod_measure_reduce, m_space_def, measurable_sets_def]) 848 >> POP_ORW 849 >> RW_TAC std_ss [measure_def] 850 >> Cases_on `(distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'}) = 0` 851 >- (Suff `joint_distribution p X (\x. (Y x,Z x)) ({FST x'} CROSS {SND x'}) = 0` 852 >- RW_TAC real_ss [] 853 >> `PREIMAGE X {FST x'} INTER p_space p IN events p` 854 by (Q.UNABBREV_TAC `s1` 855 >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def, 856 IN_POW] 857 >> Suff `{FST x'} SUBSET IMAGE X (p_space p)` >- METIS_TAC [] 858 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS]) 859 >> `PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p IN events p` 860 by (Q.UNABBREV_TAC `s2` 861 >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def, 862 IN_POW] 863 >> Suff `{SND x'} SUBSET IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)` >- METIS_TAC [] 864 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS]) 865 >> `(PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p) IN events p` 866 by (`PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p = 867 (PREIMAGE X {FST x'} INTER p_space p) INTER 868 (PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p)` 869 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER] >> DECIDE_TAC) 870 >> POP_ORW 871 >> FULL_SIMP_TAC std_ss [random_variable_def, prob_space_def, measure_space_def, 872 events_def] 873 >> METIS_TAC [sigma_algebra_def, ALGEBRA_INTER, space_def, subsets_def]) 874 >> FULL_SIMP_TAC std_ss [REAL_ENTIRE, distribution_def, joint_distribution_def] 875 >- (`(PREIMAGE (\x. (X x,Y x,Z x)) ({FST x'} CROSS {SND x'}) INTER p_space p) = 876 (PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p)` 877 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_CROSS, IN_SING]) 878 >> POP_ORW 879 >> RW_TAC std_ss [GSYM REAL_LE_ANTISYM] 880 >- (Suff `(PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p) SUBSET 881 (PREIMAGE X {FST x'} INTER p_space p)` 882 >- METIS_TAC [REAL_LE_TRANS, REAL_LE_REFL, PROB_INCREASING, random_variable_def] 883 >> RW_TAC std_ss [SUBSET_DEF, IN_INTER]) 884 >> METIS_TAC [random_variable_def, PROB_POSITIVE]) 885 >> `(PREIMAGE (\x. (X x,Y x,Z x)) ({FST x'} CROSS {SND x'}) INTER p_space p) = 886 (PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p)` 887 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_CROSS, IN_SING]) 888 >> POP_ORW 889 >> RW_TAC std_ss [GSYM REAL_LE_ANTISYM] 890 >- (Suff `(PREIMAGE X {FST x'} INTER PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p) SUBSET 891 (PREIMAGE (\x. (Y x,Z x)) {SND x'} INTER p_space p)` 892 >- METIS_TAC [REAL_LE_TRANS, REAL_LE_REFL, PROB_INCREASING, random_variable_def] 893 >> RW_TAC std_ss [SUBSET_DEF, IN_INTER]) 894 >> METIS_TAC [random_variable_def, PROB_POSITIVE]) 895 >> RW_TAC std_ss [real_div, measure_def] 896 >> `joint_distribution p X (\x. (Y x,Z x)) ({FST x'} CROSS {SND x'}) * 897 inv (distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'}) * 898 (distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'}) = 899 (inv (distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'}) * 900 (distribution p X {FST x'} * distribution p (\x. (Y x,Z x)) {SND x'})) * 901 joint_distribution p X (\x. (Y x,Z x)) ({FST x'} CROSS {SND x'})` 902 by REAL_ARITH_TAC 903 >> POP_ORW 904 >> RW_TAC real_ss [REAL_MUL_LINV]) 905 >> POP_ORW 906 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF] 907 >> Q.PAT_X_ASSUM `a SUBSET s1 CROSS s2` MP_TAC 908 >> CONV_TAC (UNBETA_CONV ``(a :'d # 'e # 'f -> bool)``) 909 >> Q.PAT_X_ASSUM `FINITE a` MP_TAC 910 >> Q.SPEC_TAC (`a`,`a`) 911 >> MATCH_MP_TAC FINITE_INDUCT 912 >> FULL_SIMP_TAC std_ss [random_variable_def] 913 >> RW_TAC std_ss [REAL_SUM_IMAGE_THM, PREIMAGE_EMPTY, INTER_EMPTY, PROB_EMPTY, DELETE_NON_ELEMENT, 914 joint_distribution_def] 915 >> `(PREIMAGE (\x. (X x,Y x,Z x)) (e INSERT s) INTER p_space p) = 916 (PREIMAGE (\x. (X x,Y x,Z x)) {e} INTER p_space p) UNION 917 (PREIMAGE (\x. (X x,Y x,Z x)) s INTER p_space p)` 918 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNION, IN_INTER, IN_PREIMAGE, IN_INSERT, NOT_IN_EMPTY] 919 >> FULL_SIMP_TAC std_ss [GSYM DELETE_NON_ELEMENT, SUBSET_DEF, IN_INSERT, IN_CROSS, IN_IMAGE] 920 >> METIS_TAC [FST, SND]) 921 >> POP_ORW 922 >> `DISJOINT (PREIMAGE (\x. (X x,Y x,Z x)) {e} INTER p_space p) 923 (PREIMAGE (\x. (X x,Y x,Z x)) s INTER p_space p)` 924 by (RW_TAC std_ss [IN_DISJOINT, IN_INTER, IN_PREIMAGE, IN_SING] 925 >> FULL_SIMP_TAC std_ss [GSYM DELETE_NON_ELEMENT] 926 >> METIS_TAC []) 927 >> `prob p (PREIMAGE (\x. (X x,Y x,Z x)) {e} INTER p_space p UNION 928 PREIMAGE (\x. (X x,Y x,Z x)) s INTER p_space p) = 929 prob p (PREIMAGE (\x. (X x,Y x,Z x)) {e} INTER p_space p) + 930 prob p (PREIMAGE (\x. (X x,Y x,Z x)) s INTER p_space p)` 931 by (MATCH_MP_TAC PROB_ADDITIVE 932 >> RW_TAC std_ss [] 933 >> METIS_TAC [SUBSET_DEF, IN_INTER, IN_POW]) 934 >> POP_ORW 935 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_INSERT]) 936 >> POP_ASSUM (MP_TAC o Q.SPEC `{x}`) 937 >> RW_TAC std_ss [IN_POW, SUBSET_DEF, IN_SING, finite_space_POW_integral_reduce, 938 FINITE_CROSS, IMAGE_FINITE, measurable_sets_def, m_space_def, 939 measure_def] 940 >> POP_ASSUM MP_TAC 941 >> `(s1 CROSS s2) = 942 x INSERT ((s1 CROSS s2) DELETE x)` 943 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INSERT, IN_DELETE] >> METIS_TAC []) 944 >> POP_ORW 945 >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_THM, FINITE_CROSS, FINITE_DELETE, DELETE_DELETE] 946 >> `FINITE (s1 CROSS s2 DELETE x)` 947 by RW_TAC std_ss [FINITE_DELETE, FINITE_CROSS] 948 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `((s1 :'d -> bool) CROSS 949 (s2 :'e # 'f -> bool) DELETE (x :'d # 'e # 'f))`) REAL_SUM_IMAGE_IN_IF] 950 >> `(\x''. 951 (if x'' IN s1 CROSS s2 DELETE x then 952 (\x''. 953 x' x'' * indicator_fn {x} x'' * 954 prod_measure (s1,POW s1,distribution p X) 955 (s2,POW s2,distribution p (\x. (Y x,Z x))) {x''}) x'' 956 else 957 0)) = (\x'. 0)` 958 by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC real_ss [indicator_fn_def, IN_SING, IN_DELETE]) 959 >> POP_ORW 960 >> ASM_SIMP_TAC real_ss [REAL_SUM_IMAGE_0, indicator_fn_def, IN_SING] 961 >> `{(x :'d # 'e # 'f)} = {FST x} CROSS {SND x}` 962 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_SING, IN_CROSS] 963 >> (MP_TAC o Q.ISPEC `(x'' :'d # 'e # 'f)`) pair_CASES 964 >> (MP_TAC o Q.ISPEC `(x :'d # 'e # 'f)`) pair_CASES 965 >> RW_TAC std_ss []) 966 >> FULL_SIMP_TAC std_ss [] >> POP_ASSUM (K ALL_TAC) 967 >> `prod_measure (s1,POW s1,distribution p X) (s2,POW s2,distribution p (\x. (Y x,Z x))) 968 ({FST x} CROSS {SND x}) = 969 measure (s1,POW s1,distribution p X) {FST x} * 970 measure (s2,POW s2,distribution p (\x. (Y x,Z x))) {SND x}` 971 by (`measure_space (s1,POW s1,distribution p X) /\ 972 measure_space (s2,POW s2,distribution p (\x. (Y x,Z x)))` 973 by METIS_TAC [distribution_prob_space, prob_space_def, space_def, subsets_def] 974 >> `{FST x} IN measurable_sets (s1,POW s1,distribution p X) /\ 975 {SND x} IN measurable_sets (s2,POW s2,distribution p (\x. (Y x,Z x)))` 976 by (RW_TAC std_ss [measurable_sets_def, IN_POW, SUBSET_DEF, IN_SING] 977 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_CROSS]) 978 >> METIS_TAC [finite_POW_prod_measure_reduce, m_space_def, measurable_sets_def]) 979 >> POP_ORW 980 >> `x = (FST x, SND x)` by RW_TAC std_ss [PAIR] 981 >> POP_ORW 982 >> FULL_SIMP_TAC std_ss [measure_def, joint_distribution_def, distribution_def] 983 >> `PREIMAGE (\x. (X x,Y x,Z x)) ({FST x} CROSS {SND x}) INTER 984 p_space p = 985 PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p` 986 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING, IN_CROSS]) 987 >> FULL_SIMP_TAC std_ss [] >> POP_ASSUM (K ALL_TAC) 988 >> POP_ORW 989 >> Suff `0 < (prob p (PREIMAGE X {FST x} INTER p_space p) * 990 prob p (PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p))` 991 >- RW_TAC real_ss [REAL_EQ_RDIV_EQ] 992 >> MATCH_MP_TAC REAL_LT_MUL 993 >> SIMP_TAC std_ss [REAL_LT_LE] 994 >> `PREIMAGE X {FST x} INTER p_space p IN events p` 995 by (Q.UNABBREV_TAC `s1` 996 >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def, 997 IN_POW] 998 >> Suff `{FST x} SUBSET IMAGE X (p_space p)` >- METIS_TAC [] 999 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS]) 1000 >> `PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p IN events p` 1001 by (Q.UNABBREV_TAC `s2` 1002 >> FULL_SIMP_TAC std_ss [random_variable_def, IN_MEASURABLE, space_def, subsets_def, 1003 IN_POW] 1004 >> Suff `{SND x} SUBSET IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)` >- METIS_TAC [] 1005 >> FULL_SIMP_TAC std_ss [SUBSET_DEF, IN_SING, IN_CROSS]) 1006 >> `(PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p) IN events p` 1007 by (`PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p = 1008 (PREIMAGE X {FST x} INTER p_space p) INTER 1009 (PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p)` 1010 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER] >> DECIDE_TAC) 1011 >> POP_ORW 1012 >> FULL_SIMP_TAC std_ss [random_variable_def, prob_space_def, measure_space_def, 1013 events_def] 1014 >> METIS_TAC [sigma_algebra_def, ALGEBRA_INTER, space_def, subsets_def]) 1015 >> Suff `~(0 = prob p (PREIMAGE X {FST x} INTER p_space p)) /\ 1016 ~(0 = prob p (PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p))` 1017 >- METIS_TAC [random_variable_def, PROB_POSITIVE] 1018 >> `0 <= prob p 1019 (PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER 1020 p_space p)` 1021 by METIS_TAC [random_variable_def, PROB_POSITIVE] 1022 >> (CONJ_TAC >> SPOSE_NOT_THEN STRIP_ASSUME_TAC) 1023 >- (Suff `prob p (PREIMAGE (X :'a -> 'd) {FST x} INTER PREIMAGE ((\x. (Y x,Z x)) :'a -> 'e # 'f) {SND x} INTER p_space p) <= 1024 prob p (PREIMAGE (X :'a -> 'd) {FST x} INTER p_space p)` 1025 >- METIS_TAC [REAL_LE_ANTISYM] 1026 >> MATCH_MP_TAC PROB_INCREASING 1027 >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER]) 1028 >> Suff `prob p (PREIMAGE X {FST x} INTER PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p) <= 1029 prob p (PREIMAGE (\x. (Y x,Z x)) {SND x} INTER p_space p)` 1030 >- METIS_TAC [REAL_LE_ANTISYM] 1031 >> MATCH_MP_TAC PROB_INCREASING 1032 >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER]); 1033 1034val finite_conditional_mutual_information_reduce = store_thm 1035 ("finite_conditional_mutual_information_reduce", 1036 ``!b p X Y Z. (POW (p_space p) = events p) /\ 1037 random_variable X p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) /\ 1038 random_variable Y p (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) /\ 1039 random_variable Z p (IMAGE Z (p_space p), POW (IMAGE Z (p_space p))) /\ 1040 FINITE (p_space p) ==> 1041 (conditional_mutual_information b p (IMAGE X (p_space p), POW (IMAGE X (p_space p))) 1042 (IMAGE Y (p_space p), POW (IMAGE Y (p_space p))) 1043 (IMAGE Z (p_space p), POW (IMAGE Z (p_space p))) 1044 X Y Z = 1045 ~ SIGMA (\(x,z). joint_distribution p X Z {(x,z)} * 1046 logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z})) 1047 (IMAGE X (p_space p) CROSS IMAGE Z (p_space p)) 1048 - 1049 ~ SIGMA (\(x,(y,z)). joint_distribution p X (\x. (Y x, Z x)) {(x,(y,z))} * 1050 logr b (joint_distribution p X (\x. (Y x, Z x)) {(x,(y,z))}/ 1051 distribution p (\x. (Y x, Z x)) {(y,z)})) 1052 ((IMAGE X (p_space p)) CROSS (IMAGE (\x. (Y x, Z x)) (p_space p))))``, 1053 REPEAT STRIP_TAC 1054 >> `random_variable (\x. (Y x, Z x)) p (IMAGE (\x. (Y x, Z x)) (p_space p), POW (IMAGE (\x. (Y x, Z x)) (p_space p)))` 1055 by (RW_TAC std_ss [random_variable_def] 1056 >- FULL_SIMP_TAC std_ss [random_variable_def] 1057 >> FULL_SIMP_TAC std_ss [IN_MEASURABLE, random_variable_def, POW_SIGMA_ALGEBRA, IN_FUNSET, space_def, 1058 subsets_def, IN_POW] 1059 >> CONJ_TAC >- (RW_TAC std_ss [IN_IMAGE] >> METIS_TAC []) 1060 >> METIS_TAC [SUBSET_DEF, IN_INTER, IN_POW]) 1061 >> RW_TAC std_ss [conditional_mutual_information_def, finite_mutual_information_reduce, space_def, subsets_def] 1062 >> Know `prod_space = 1063 ((IMAGE Y (p_space p)) CROSS (IMAGE Z (p_space p)), POW ((IMAGE Y (p_space p)) CROSS (IMAGE Z (p_space p))), 1064 prod_measure (IMAGE Y (p_space p), POW (IMAGE Y (p_space p)), distribution p Y) 1065 (IMAGE Z (p_space p), POW (IMAGE Z (p_space p)), distribution p Z))` 1066 >- ( Q.UNABBREV_TAC `prod_space` 1067 >> Q.ABBREV_TAC `s1 = IMAGE Y (p_space p)` >> Q.ABBREV_TAC `s2 = IMAGE Z (p_space p)` 1068 >> RW_TAC std_ss [prod_measure_space_def, m_space_def, subsets_def, EXTENSION, subsets_def, 1069 sigma_def, prod_sets_def, IN_POW, IN_BIGINTER, measurable_sets_def, 1070 SUBSET_DEF, IN_CROSS, GSPECIFICATION] 1071 >> RW_TAC std_ss [GSYM EXTENSION] 1072 >> EQ_TAC 1073 >- (ASM_SIMP_TAC std_ss [] >> STRIP_TAC 1074 >> Q.PAT_X_ASSUM `!s. Q ==> x IN P` (MP_TAC o Q.SPEC `POW (s1 CROSS s2)`) 1075 1076 >> ASM_SIMP_TAC std_ss [IN_POW, SUBSET_DEF, IN_CROSS, POW_SIGMA_ALGEBRA] 1077 >> STRIP_TAC >> POP_ASSUM MATCH_MP_TAC 1078 >> NTAC 2 STRIP_TAC >> POP_ASSUM MP_TAC 1079 >> `(x' :('c -> bool) # ('d -> bool)) = (FST x', SND x')` by RW_TAC std_ss [PAIR] 1080 >> POP_ORW >> RW_TAC std_ss [IN_CROSS] ) 1081 >> RW_TAC std_ss [] 1082 >> `x = BIGUNION (IMAGE (\a. {a}) x)` 1083 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_BIGUNION_IMAGE, IN_SING]) 1084 >> POP_ORW 1085 >> FULL_SIMP_TAC std_ss [SIGMA_ALGEBRA, subsets_def] 1086 >> POP_ASSUM MATCH_MP_TAC 1087 >> CONJ_TAC 1088 >- (MATCH_MP_TAC FINITE_COUNTABLE >> MATCH_MP_TAC IMAGE_FINITE 1089 >> (MP_TAC o Q.ISPEC `(s1 :'c -> bool) CROSS (s2 :'d -> bool)`) SUBSET_FINITE 1090 >> Q.UNABBREV_TAC `s1` >> Q.UNABBREV_TAC `s2` 1091 >> RW_TAC std_ss [FINITE_CROSS, IMAGE_FINITE] 1092 >> POP_ASSUM MATCH_MP_TAC 1093 >> METIS_TAC [SUBSET_DEF, IN_CROSS]) 1094 >> RW_TAC std_ss [SUBSET_DEF, IN_IMAGE, IN_SING] 1095 >> Q.PAT_X_ASSUM `!x. Q ==> x IN P` MATCH_MP_TAC 1096 >> Q.EXISTS_TAC `({FST a}, {SND a})` >> RW_TAC std_ss [PAIR_EQ, IN_SING] 1097 >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_CROSS, IN_SING] 1098 >> METIS_TAC [PAIR_EQ, PAIR, FST, SND]) 1099 >> DISCH_TAC 1100 >> RW_TAC std_ss [conditional_mutual_information_def, finite_mutual_information_reduce2, 1101 PSPACE, EVENTS] 1102 >> `!a. a INTER p_space p IN events p` 1103 by METIS_TAC [IN_INTER, IN_POW, SUBSET_DEF] 1104 >> `!a. 0 <= prob p (a INTER p_space p)` 1105 by METIS_TAC [random_variable_def, PROB_POSITIVE] 1106 >> `!a b. (prob p (b INTER p_space p) = 0) ==> 1107 (prob p (a INTER b INTER p_space p) = 0)` 1108 by (RW_TAC std_ss [] >> ONCE_REWRITE_TAC [GSYM REAL_LE_ANTISYM] 1109 >> RW_TAC std_ss [] >> POP_ASSUM (fn thm => ONCE_REWRITE_TAC [GSYM thm]) 1110 >> MATCH_MP_TAC PROB_INCREASING 1111 >> FULL_SIMP_TAC std_ss [random_variable_def, SUBSET_DEF, IN_INTER]) 1112 >> `FINITE (IMAGE X (p_space p) CROSS IMAGE Z (p_space p))` 1113 by RW_TAC std_ss [IMAGE_FINITE, FINITE_CROSS] 1114 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p) 1115 CROSS IMAGE (Z :'a -> 'd) (p_space p))`) REAL_SUM_IMAGE_IN_IF] 1116 >> `(\x. 1117 (if x IN IMAGE X (p_space p) CROSS IMAGE Z (p_space p) then 1118 (\(x,y). 1119 joint_distribution p X Z {(x,y)} * 1120 logr b 1121 (joint_distribution p X Z {(x,y)} / (distribution p X {x} * distribution p Z {y}))) 1122 x 1123 else 1124 0)) = 1125 (\x. if x IN IMAGE X (p_space p) CROSS IMAGE Z (p_space p) then 1126 (\(x,z). joint_distribution p X Z {(x,z)} * logr b (joint_distribution p X Z {(x,z)} / 1127 (distribution p Z {z})) 1128 - joint_distribution p X Z {(x,z)} * logr b (distribution p X {x})) x 1129 else 0)` 1130 by (RW_TAC std_ss [FUN_EQ_THM, IN_IMAGE, IN_CROSS] >> RW_TAC std_ss [] 1131 >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss [] 1132 >> FULL_SIMP_TAC std_ss [] 1133 >> RW_TAC std_ss [GSYM REAL_SUB_LDISTRIB] 1134 >> `joint_distribution p X Z {(X x',Z x'')} / 1135 (distribution p X {X x'} * distribution p Z {Z x''}) = 1136 (joint_distribution p X Z {(X x',Z x'')} / 1137 (distribution p Z {Z x''})) / 1138 (distribution p X {X x'})` 1139 by (RW_TAC std_ss [real_div] 1140 >> Cases_on `distribution p X {X x'} = 0` 1141 >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def] 1142 >> `PREIMAGE (\x. (X x,Z x)) {(X x',Z x'')} INTER p_space p = 1143 PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p` 1144 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING]) 1145 >> POP_ORW 1146 >> `PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p = 1147 PREIMAGE Z {Z x''} INTER PREIMAGE X {X x'} INTER p_space p` 1148 by METIS_TAC [INTER_ASSOC, INTER_COMM] 1149 >> POP_ORW 1150 >> RW_TAC real_ss []) 1151 >> Cases_on `distribution p Z {Z x''} = 0` 1152 >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def] 1153 >> `PREIMAGE (\x. (X x,Z x)) {(X x',Z x'')} INTER p_space p = 1154 PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p` 1155 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING]) 1156 >> ASM_SIMP_TAC real_ss []) 1157 >> RW_TAC real_ss [REAL_INV_MUL] 1158 >> REAL_ARITH_TAC) 1159 >> POP_ORW 1160 >> Cases_on `distribution p X {X x'} = 0` 1161 >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def] 1162 >> `PREIMAGE (\x. (X x,Z x)) {(X x',Z x'')} INTER p_space p = 1163 PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p` 1164 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING]) 1165 >> POP_ORW 1166 >> `PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p = 1167 PREIMAGE Z {Z x''} INTER PREIMAGE X {X x'} INTER p_space p` 1168 by METIS_TAC [INTER_ASSOC, INTER_COMM] 1169 >> POP_ORW 1170 >> RW_TAC real_ss []) 1171 >> Cases_on `joint_distribution p X Z {(X x',Z x'')} / distribution p Z {Z x''} = 0` 1172 >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def] 1173 >> `PREIMAGE (\x. (X x,Z x)) {(X x',Z x'')} INTER p_space p = 1174 PREIMAGE X {X x'} INTER PREIMAGE Z {Z x''} INTER p_space p` 1175 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING]) 1176 >> FULL_SIMP_TAC std_ss [] 1177 >> Cases_on `prob p (PREIMAGE Z {Z x''} INTER p_space p) = 0` 1178 >- RW_TAC real_ss [] 1179 >> `0 < prob p (PREIMAGE Z {Z x''} INTER p_space p)` 1180 by METIS_TAC [REAL_LT_LE] 1181 >> FULL_SIMP_TAC real_ss [REAL_EQ_LDIV_EQ]) 1182 >> `0 < distribution p X {X x'}` 1183 by RW_TAC std_ss [distribution_def, REAL_LT_LE] 1184 >> `0 < joint_distribution p X Z {(X x',Z x'')} / distribution p Z {Z x''}` 1185 by RW_TAC std_ss [REAL_LT_LE, joint_distribution_def, distribution_def, REAL_LE_DIV] 1186 >> RW_TAC std_ss [logr_div]) 1187 >> POP_ORW 1188 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF] 1189 >> RW_TAC std_ss [REAL_SUB_RNEG] 1190 >> `!x y. ~ x + y = y - x` 1191 by REAL_ARITH_TAC 1192 >> POP_ORW 1193 >> `SIGMA 1194 (\x. 1195 (\(x,z). 1196 joint_distribution p X Z {(x,z)} * 1197 logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z}) - 1198 joint_distribution p X Z {(x,z)} * logr b (distribution p X {x})) x) 1199 (IMAGE X (p_space p) CROSS IMAGE Z (p_space p)) = 1200 SIGMA 1201 (\x. 1202 (\(x,z). 1203 joint_distribution p X Z {(x,z)} * 1204 logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z})) x) 1205 (IMAGE X (p_space p) CROSS IMAGE Z (p_space p)) - 1206 SIGMA 1207 (\x. 1208 (\(x,z). joint_distribution p X Z {(x,z)} * logr b (distribution p X {x})) x) 1209 (IMAGE X (p_space p) CROSS IMAGE Z (p_space p))` 1210 by (RW_TAC std_ss [real_sub] 1211 >> `(\x. (\(x,z). 1212 joint_distribution p X Z {(x,z)} * 1213 logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z}) + 1214 ~(joint_distribution p X Z {(x,z)} * logr b (distribution p X {x}))) x) = 1215 (\x. (\(x,z). 1216 joint_distribution p X Z {(x,z)} * 1217 logr b (joint_distribution p X Z {(x,z)} / distribution p Z {z})) x + 1218 (\(x,z). ~(joint_distribution p X Z {(x,z)} * logr b (distribution p X {x}))) x)` 1219 by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC std_ss [] 1220 >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss [] 1221 >> FULL_SIMP_TAC std_ss []) 1222 >> POP_ORW 1223 >> RW_TAC std_ss [REAL_SUM_IMAGE_ADD, FINITE_CROSS, IMAGE_FINITE, REAL_EQ_LADD] 1224 >> `(\x. (\(x,z). ~(joint_distribution p X Z {(x,z)} * logr b (distribution p X {x}))) x) = 1225 (\x. ~1 * (\(x,z). (joint_distribution p X Z {(x,z)} * logr b (distribution p X {x}))) x)` 1226 by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC std_ss [] 1227 >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss [] 1228 >> FULL_SIMP_TAC real_ss []) 1229 >> POP_ORW 1230 >> RW_TAC std_ss [REAL_SUM_IMAGE_CMUL, IMAGE_FINITE, FINITE_CROSS] 1231 >> RW_TAC real_ss []) 1232 >> POP_ORW 1233 >> `!(x:real) y z. x - (y - z) = (x + z) - y` 1234 by REAL_ARITH_TAC 1235 >> POP_ORW 1236 >> Suff `SIGMA 1237 (\(x,y,z). 1238 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1239 logr b 1240 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 1241 (distribution p X {x} * 1242 distribution p (\x. (Y x,Z x)) {(y,z)}))) 1243 (IMAGE X (p_space p) CROSS 1244 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))) + 1245SIGMA 1246 (\x. 1247 (\(x,z). 1248 joint_distribution p X Z {(x,z)} * 1249 logr b (distribution p X {x})) x) 1250 (IMAGE X (p_space p) CROSS IMAGE Z (p_space p)) = 1251 SIGMA 1252 (\(x,y,z). 1253 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1254 logr b 1255 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 1256 distribution p (\x. (Y x,Z x)) {(y,z)})) 1257 (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p))` 1258 >- RW_TAC std_ss [] 1259 >> `FINITE (IMAGE X (p_space p) CROSS IMAGE Z (p_space p))` 1260 by RW_TAC std_ss [FINITE_CROSS, IMAGE_FINITE] 1261 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p) 1262 CROSS IMAGE (Z :'a -> 'd) (p_space p))`) REAL_SUM_IMAGE_IN_IF] 1263 >> `(\x. 1264 (if x IN IMAGE X (p_space p) CROSS IMAGE Z (p_space p) then 1265 (\x. 1266 (\(x,z). joint_distribution p X Z {(x,z)} * logr b (distribution p X {x})) x) 1267 x 1268 else 1269 0)) = 1270 (\x. if x IN IMAGE X (p_space p) CROSS IMAGE Z (p_space p) then 1271 (\x. (\(x,z). SIGMA (\y. joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1272 logr b (distribution p X {x})) (IMAGE Y (p_space p))) x) x else 0)` 1273 by (RW_TAC std_ss [FUN_EQ_THM] >> RW_TAC std_ss [] 1274 >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss [] 1275 >> RW_TAC std_ss [] 1276 >> `(\y. joint_distribution p X (\x. (Y x,Z x)) {(q,y,r)} * logr b (distribution p X {q})) = 1277 (\y. logr b (distribution p X {q}) * (\y. joint_distribution p X (\x. (Y x,Z x)) {(q,y,r)}) y)` 1278 by RW_TAC std_ss [REAL_MUL_COMM] 1279 >> POP_ORW 1280 >> ASM_SIMP_TAC std_ss [IMAGE_FINITE, REAL_SUM_IMAGE_CMUL] 1281 >> Suff `joint_distribution p X Z {(q,r)} = 1282 SIGMA (\y. joint_distribution p X (\x. (Y x,Z x)) {(q,y,r)}) 1283 (IMAGE Y (p_space p))` 1284 >- RW_TAC std_ss [REAL_MUL_COMM] 1285 >> RW_TAC std_ss [joint_distribution_def] 1286 >> `PREIMAGE (\x. (X x,Z x)) {(q,r)} INTER p_space p = 1287 PREIMAGE X {q} INTER PREIMAGE Z {r} INTER p_space p` 1288 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING]) 1289 >> POP_ORW 1290 >> `!y. PREIMAGE (\x. (X x,Y x,Z x)) {(q,y,r)} INTER p_space p = 1291 PREIMAGE X {q} INTER PREIMAGE Y {y} INTER PREIMAGE Z {r} INTER p_space p` 1292 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> METIS_TAC []) 1293 >> POP_ORW 1294 >> `FINITE (IMAGE Y (p_space p))` by RW_TAC std_ss [IMAGE_FINITE] 1295 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `IMAGE (Y :'a -> 'c) (p_space p)`) REAL_SUM_IMAGE_IN_IF] 1296 >> (MP_TAC o Q.ISPECL [`(p :('a -> bool) # (('a -> bool) -> bool) # (('a -> bool) -> real))`, 1297 `\(y:'c). PREIMAGE (Y:'a->'c) {y}`, 1298 `(PREIMAGE (X :'a -> 'b) {(q :'b)} INTER 1299 PREIMAGE (Z :'a -> 'd) {(r :'d)} INTER p_space p)`, 1300 `IMAGE (Y:'a->'c) (p_space p)`]) PROB_REAL_SUM_IMAGE_FN 1301 >> ASM_SIMP_TAC std_ss [IMAGE_FINITE] 1302 >> `prob_space p` by FULL_SIMP_TAC std_ss [random_variable_def] 1303 >> `(!x. x IN IMAGE Y (p_space p) ==> 1304 PREIMAGE X {q} INTER PREIMAGE Z {r} INTER p_space p INTER 1305 PREIMAGE Y {x} IN events p)` 1306 by METIS_TAC [INTER_COMM, INTER_ASSOC] 1307 >> `(!x y. x IN IMAGE Y (p_space p) /\ y IN IMAGE Y (p_space p) /\ ~(x = y) ==> 1308 DISJOINT (PREIMAGE Y {x}) (PREIMAGE Y {y}))` 1309 by (RW_TAC std_ss [IN_IMAGE, IN_DISJOINT, IN_PREIMAGE, IN_SING] >> METIS_TAC []) 1310 >> `(BIGUNION (IMAGE (\y. PREIMAGE Y {y}) (IMAGE Y (p_space p))) INTER p_space p = p_space p)` 1311 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNIV, IN_BIGUNION, IN_IMAGE, IN_PREIMAGE, IN_SING, IN_INTER] 1312 >> EQ_TAC >- RW_TAC std_ss [] 1313 >> RW_TAC std_ss [] >> Q.EXISTS_TAC `PREIMAGE Y {Y x}` 1314 >> RW_TAC std_ss [IN_PREIMAGE, IN_SING] >> METIS_TAC []) 1315 >> ASM_SIMP_TAC std_ss [] 1316 >> STRIP_TAC >> POP_ASSUM (K ALL_TAC) 1317 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `IMAGE (Y :'a -> 'c) (p_space p)`) REAL_SUM_IMAGE_IN_IF] 1318 >> Suff `(\x. 1319 (if x IN IMAGE Y (p_space p) then 1320 prob p 1321 (PREIMAGE X {q} INTER PREIMAGE Z {r} INTER p_space p INTER 1322 PREIMAGE Y {x}) 1323 else 1324 0)) = 1325 (\x. 1326 (if x IN IMAGE Y (p_space p) then 1327 prob p 1328 (PREIMAGE X {q} INTER PREIMAGE Y {x} INTER 1329 PREIMAGE Z {r} INTER p_space p) 1330 else 1331 0))` 1332 >- RW_TAC std_ss [] 1333 >> RW_TAC std_ss [FUN_EQ_THM] 1334 >> RW_TAC std_ss [IN_IMAGE] 1335 >> Suff `PREIMAGE X {q} INTER PREIMAGE Z {r} INTER p_space p INTER PREIMAGE Y {Y x'} = 1336 PREIMAGE X {q} INTER PREIMAGE Y {Y x'} INTER PREIMAGE Z {r} INTER p_space p` 1337 >- RW_TAC std_ss [] 1338 >> ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> DECIDE_TAC) 1339 >> POP_ORW 1340 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF, FINITE_CROSS, IMAGE_FINITE] 1341 >> `(\x. 1342 (\(x,z). 1343 SIGMA 1344 (\y. 1345 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1346 logr b (distribution p X {x})) (IMAGE Y (p_space p))) x) = 1347 (\x. SIGMA ((\(x,z). (\y. 1348 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1349 logr b (distribution p X {x})) ) x) (IMAGE Y (p_space p)))` 1350 by (RW_TAC std_ss [FUN_EQ_THM] >> (MP_TAC o Q.ISPEC `x:'b#'d`) pair_CASES >> RW_TAC std_ss [] 1351 >> RW_TAC std_ss []) 1352 >> POP_ORW 1353 >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_REAL_SUM_IMAGE, FINITE_CROSS, IMAGE_FINITE] 1354 >> `(IMAGE X (p_space p) CROSS IMAGE Z (p_space p) CROSS 1355 IMAGE Y (p_space p)) = 1356 IMAGE (\a. ((FST a, SND(SND a)), FST (SND a))) 1357 (IMAGE X (p_space p) CROSS (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)))` 1358 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_IMAGE, IN_CROSS] 1359 >> EQ_TAC 1360 >- (RW_TAC std_ss [] >> Q.EXISTS_TAC `(FST (FST x), (SND x, SND(FST x)))` 1361 >> RW_TAC std_ss [FST,SND] >> METIS_TAC [PAIR]) 1362 >> RW_TAC std_ss [FST, SND] 1363 >> RW_TAC std_ss [FST, SND] >> METIS_TAC []) 1364 >> POP_ORW 1365 >> `INJ (\a. ((FST a,SND (SND a)),FST (SND a))) 1366 ((IMAGE X (p_space p) CROSS 1367 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)))) 1368 (IMAGE (\a. ((FST a,SND (SND a)),FST (SND a))) ((IMAGE X (p_space p) CROSS 1369 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)))))` 1370 by (RW_TAC std_ss [INJ_DEF, IN_IMAGE, IN_CROSS] >- METIS_TAC [] 1371 >> METIS_TAC [PAIR, PAIR_EQ]) 1372 >> RW_TAC std_ss [REAL_SUM_IMAGE_IMAGE, IMAGE_FINITE, FINITE_CROSS, o_DEF] 1373 >> RW_TAC std_ss [GSYM REAL_SUM_IMAGE_ADD, FINITE_CROSS, IMAGE_FINITE] 1374 >> `(IMAGE X (p_space p) CROSS 1375 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p))) = 1376 (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)) UNION 1377 (((IMAGE X (p_space p) CROSS 1378 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)))) DIFF 1379 (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)))` 1380 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNION, IN_DIFF, IN_CROSS, IN_IMAGE] >> METIS_TAC [PAIR_EQ, PAIR]) 1381 >> POP_ORW 1382 >> `DISJOINT (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)) 1383 ((IMAGE X (p_space p) CROSS 1384 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF 1385 IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)))` 1386 by (RW_TAC std_ss [IN_DISJOINT, IN_DIFF] >> DECIDE_TAC) 1387 >> `SIGMA 1388 (\a. 1389 (\(x,y,z). 1390 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1391 logr b 1392 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 1393 (distribution p X {x} * 1394 distribution p (\x. (Y x,Z x)) {(y,z)}))) a + 1395 joint_distribution p X (\x. (Y x,Z x)) {a} * 1396 logr b (distribution p X {FST a})) 1397 (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p) UNION 1398 (IMAGE X (p_space p) CROSS 1399 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF 1400 IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p))) = 1401 SIGMA (\a. 1402 (\(x,y,z). 1403 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1404 logr b 1405 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 1406 (distribution p X {x} * 1407 distribution p (\x. (Y x,Z x)) {(y,z)}))) a + 1408 joint_distribution p X (\x. (Y x,Z x)) {a} * 1409 logr b (distribution p X {FST a})) (IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)) + 1410 SIGMA (\a. 1411 (\(x,y,z). 1412 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1413 logr b 1414 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 1415 (distribution p X {x} * 1416 distribution p (\x. (Y x,Z x)) {(y,z)}))) a + 1417 joint_distribution p X (\x. (Y x,Z x)) {a} * 1418 logr b (distribution p X {FST a})) ((IMAGE X (p_space p) CROSS 1419 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF 1420 IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p)))` 1421 by (MATCH_MP_TAC REAL_SUM_IMAGE_DISJOINT_UNION 1422 >> RW_TAC std_ss [FINITE_DIFF, IMAGE_FINITE, FINITE_CROSS]) 1423 >> POP_ORW 1424 >> `FINITE (IMAGE X (p_space p) CROSS 1425 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF 1426 IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p))` 1427 by RW_TAC std_ss [FINITE_DIFF, FINITE_CROSS, IMAGE_FINITE] 1428 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) 1429 (p_space 1430 (p : 1431 ('a -> bool) # 1432 (('a -> bool) -> bool) # (('a -> bool) -> real))) CROSS 1433 (IMAGE (Y :'a -> 'c) (p_space p) CROSS 1434 IMAGE (Z :'a -> 'd) (p_space p)) DIFF 1435 IMAGE X (p_space p) CROSS 1436 IMAGE (\(x :'a). (Y x,Z x)) (p_space p))`) REAL_SUM_IMAGE_IN_IF] 1437 >> `(\x. 1438 (if 1439 x IN 1440 IMAGE X (p_space p) CROSS 1441 (IMAGE Y (p_space p) CROSS IMAGE Z (p_space p)) DIFF 1442 IMAGE X (p_space p) CROSS IMAGE (\x. (Y x,Z x)) (p_space p) 1443 then 1444 (\a. 1445 (\(x,y,z). 1446 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1447 logr b 1448 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 1449 (distribution p X {x} * 1450 distribution p (\x. (Y x,Z x)) {(y,z)}))) a + 1451 joint_distribution p X (\x. (Y x,Z x)) {a} * 1452 logr b (distribution p X {FST a})) x 1453 else 1454 0)) = (\x. 0)` 1455 by (RW_TAC std_ss [FUN_EQ_THM, IN_DIFF, IN_CROSS, IN_IMAGE] 1456 >> (MP_TAC o Q.ISPEC `(x :'b # 'c # 'd)`) pair_CASES 1457 >> STRIP_TAC 1458 >> (MP_TAC o Q.ISPEC `(r :'c # 'd)`) pair_CASES 1459 >> STRIP_TAC 1460 >> RW_TAC std_ss [FST, SND, joint_distribution_def] 1461 >- (`PREIMAGE (\x. (X x,Y x,Z x)) {(X x',Y x'',Z x''')} INTER p_space p = {}` 1462 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [NOT_IN_EMPTY, IN_INTER, IN_PREIMAGE, IN_SING] 1463 >> METIS_TAC []) 1464 >> FULL_SIMP_TAC real_ss [random_variable_def, PROB_EMPTY]) 1465 >> `PREIMAGE (\x. (X x,Y x,Z x)) {(X x',Y x'',Z x''')} INTER p_space p = {}` 1466 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [NOT_IN_EMPTY, IN_INTER, IN_PREIMAGE, IN_SING] 1467 >> METIS_TAC []) 1468 >> FULL_SIMP_TAC real_ss [random_variable_def, PROB_EMPTY]) 1469 >> POP_ORW 1470 >> RW_TAC real_ss [REAL_SUM_IMAGE_0, FINITE_CROSS, IMAGE_FINITE, FINITE_DIFF] 1471 >> Suff `(\a. 1472 (\(x,y,z). 1473 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1474 logr b 1475 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 1476 (distribution p X {x} * 1477 distribution p (\x. (Y x,Z x)) {(y,z)}))) a + 1478 joint_distribution p X (\x. (Y x,Z x)) {a} * 1479 logr b (distribution p X {FST a})) = 1480 (\(x,y,z). 1481 joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} * 1482 logr b 1483 (joint_distribution p X (\x. (Y x,Z x)) {(x,y,z)} / 1484 distribution p (\x. (Y x,Z x)) {(y,z)}))` 1485 >- RW_TAC std_ss [] 1486 >> RW_TAC std_ss [FUN_EQ_THM] 1487 >> (MP_TAC o Q.ISPEC `(a :'b # 'c # 'd)`) pair_CASES 1488 >> STRIP_TAC 1489 >> (MP_TAC o Q.ISPEC `(r :'c # 'd)`) pair_CASES 1490 >> STRIP_TAC 1491 >> RW_TAC std_ss [GSYM REAL_ADD_LDISTRIB] 1492 >> Cases_on `distribution p X {q} = 0` 1493 >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def] 1494 >> `PREIMAGE (\x. (X x,Y x,Z x)) {(q,q',r')} INTER p_space p = 1495 PREIMAGE X {q} INTER PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER 1496 p_space p` 1497 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> METIS_TAC []) 1498 >> FULL_SIMP_TAC std_ss [] 1499 >> `PREIMAGE X {q} INTER PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER 1500 p_space p = PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER PREIMAGE X {q} INTER p_space p` 1501 by METIS_TAC [INTER_ASSOC, INTER_COMM] 1502 >> RW_TAC real_ss []) 1503 >> Cases_on `(joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} / 1504 (distribution p X {q} * distribution p (\x. (Y x,Z x)) {(q',r')})) = 0` 1505 >- (FULL_SIMP_TAC real_ss [joint_distribution_def, distribution_def] 1506 >> `PREIMAGE (\x. (X x,Y x,Z x)) {(q,q',r')} INTER p_space p = 1507 PREIMAGE X {q} INTER PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER 1508 p_space p` 1509 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> METIS_TAC []) 1510 >> FULL_SIMP_TAC std_ss [] 1511 >> Cases_on `prob p (PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER p_space p) = 0` 1512 >- RW_TAC real_ss [] 1513 >> `0 < (prob p (PREIMAGE X {q} INTER p_space p) * 1514 prob p (PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER p_space p))` 1515 by RW_TAC std_ss [REAL_ENTIRE, REAL_LT_LE, REAL_LE_MUL] 1516 >> FULL_SIMP_TAC real_ss [REAL_EQ_LDIV_EQ]) 1517 >> `0 < distribution p X {q}` 1518 by RW_TAC std_ss [distribution_def, REAL_LT_LE] 1519 >> `0 < joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} / 1520 (distribution p X {q} * distribution p (\x. (Y x,Z x)) {(q',r')})` 1521 by RW_TAC std_ss [REAL_LT_LE, REAL_LE_DIV, REAL_LE_MUL, distribution_def, joint_distribution_def] 1522 >> RW_TAC std_ss [GSYM logr_mul] 1523 >> Suff `joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} / 1524 (distribution p X {q} * distribution p (\x. (Y x,Z x)) {(q',r')}) * distribution p X {q} = 1525 joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')}/(distribution p (\x. (Y x,Z x)) {(q',r')})` 1526 >- RW_TAC std_ss [] 1527 >> RW_TAC std_ss [real_div] 1528 >> Cases_on `distribution p X {q} = 0` 1529 >- FULL_SIMP_TAC real_ss [distribution_def, joint_distribution_def] 1530 >> Cases_on `distribution p (\x. (Y x,Z x)) {(q',r')} = 0` 1531 >- (FULL_SIMP_TAC real_ss [distribution_def, joint_distribution_def] 1532 >> `PREIMAGE (\x. (X x,Y x,Z x)) {(q,q',r')} INTER p_space p = 1533 PREIMAGE X {q} INTER PREIMAGE (\x. (Y x,Z x)) {(q',r')} INTER 1534 p_space p` 1535 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_INTER, IN_PREIMAGE, IN_SING] >> METIS_TAC []) 1536 >> FULL_SIMP_TAC real_ss []) 1537 >> RW_TAC std_ss [REAL_INV_MUL] 1538 >> `joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} * 1539 (inv (distribution p X {q}) * inv (distribution p (\x. (Y x,Z x)) {(q',r')})) * 1540 distribution p X {q} = 1541 (joint_distribution p X (\x. (Y x,Z x)) {(q,q',r')} * 1542 inv (distribution p (\x. (Y x,Z x)) {(q',r')})) * 1543 (inv (distribution p X {q}) * distribution p X {q})` by REAL_ARITH_TAC 1544 >> POP_ORW 1545 >> RW_TAC real_ss [REAL_MUL_LINV]); 1546 1547 1548(* ************************************************************************* *) 1549 1550(* -------------Entropy of a RV with a certain event is zero---------------- *) 1551 1552 1553val finite_entropy_certainty_eq_0 = store_thm 1554 ("finite_entropy_certainty_eq_0", 1555 ``!b p X. 1556 (POW (p_space p) = events p) /\ 1557 random_variable X p 1558 (IMAGE X (p_space p),POW (IMAGE X (p_space p))) /\ 1559 FINITE (p_space p) /\ 1560 (?x. x IN IMAGE X (p_space p) /\ (distribution p X {x} = 1)) ==> 1561 (entropy b p (IMAGE X (p_space p),POW (IMAGE X (p_space p))) X = 0)``, 1562 RW_TAC std_ss [finite_entropy_reduce] 1563 >> `FINITE (IMAGE X (p_space p))` by RW_TAC std_ss [IMAGE_FINITE] 1564 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `IMAGE (X :'a -> 'b) (p_space p)`) REAL_SUM_IMAGE_IN_IF] 1565 >> Suff `(\x. 1566 (if x IN IMAGE X (p_space p) then 1567 (\x. distribution p X {x} * logr b (distribution p X {x})) x 1568 else 1569 0)) = (\x. 0)` 1570 >- RW_TAC real_ss [REAL_SUM_IMAGE_0, IMAGE_FINITE] 1571 1572 >> RW_TAC std_ss [FUN_EQ_THM] 1573 >> RW_TAC std_ss [] 1574 >> Cases_on `x' = x` 1575 >- RW_TAC real_ss [logr_1] 1576 >> (MP_TAC o Q.SPECL [`X`,`p`,`x`]) distribution_x_eq_1_imp_distribution_y_eq_0 1577 >> ASM_SIMP_TAC real_ss [GSPECIFICATION]); 1578 1579 1580(* --------------- upper bound on entropy for a rv ------------------------- *) 1581 1582 1583val finite_entropy_le_card = store_thm 1584 ("finite_entropy_le_card", 1585 ``!b p X. 1 <= b /\ 1586 (POW (p_space p) = events p) /\ 1587 random_variable X p 1588 (IMAGE X (p_space p),POW (IMAGE X (p_space p))) /\ 1589 FINITE (p_space p) ==> 1590 entropy b p (IMAGE X (p_space p),POW (IMAGE X (p_space p))) X <= 1591 logr b (& (CARD ((IMAGE X (p_space p)) INTER {x| ~(distribution p X {x} = 0)})))``, 1592 RW_TAC std_ss [finite_entropy_reduce] 1593 >> `!x. ~x = ~1*x` by REAL_ARITH_TAC >> POP_ORW 1594 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_CMUL, IMAGE_FINITE] 1595 >> RW_TAC real_ss [] 1596 >> ONCE_REWRITE_TAC [GSYM REAL_MUL_RNEG] 1597 >> `FINITE (IMAGE X (p_space p))` 1598 by RW_TAC std_ss [INTER_FINITE, IMAGE_FINITE] 1599 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) REAL_SUM_IMAGE_IN_IF] 1600 >> `(\x. 1601 (if x IN IMAGE X (p_space p) then 1602 (\x. distribution p X {x} * ~logr b (distribution p X {x})) x 1603 else 1604 0)) = 1605 (\x. 1606 (if x IN IMAGE X (p_space p) then 1607 (\x. distribution p X {x} * logr b (inv (distribution p X {x}))) x 1608 else 1609 0))` 1610 by (RW_TAC std_ss [FUN_EQ_THM] >> Cases_on `distribution p X {x} = 0` >- RW_TAC real_ss [] 1611 >> RW_TAC std_ss [IN_INTER, GSPECIFICATION, REAL_EQ_LMUL] 1612 >> MATCH_MP_TAC (GSYM logr_inv) 1613 >> RW_TAC std_ss [REAL_LT_LE, distribution_def] 1614 >> MATCH_MP_TAC PROB_POSITIVE 1615 >> METIS_TAC [random_variable_def, IN_POW, SUBSET_DEF, IN_INTER]) 1616 >> POP_ORW 1617 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF] 1618 >> Q.ABBREV_TAC `P = \x:'b. distribution p (X :'a -> 'b) {x}` 1619 >> Q.ABBREV_TAC `g = \x:'b. inv (P x)` 1620 >> `(\x. distribution p X {x} * logr b (inv (distribution p X {x}))) = 1621 (\x. P x * logr b (g x))` 1622 by (RW_TAC std_ss [FUN_EQ_THM] >> Q.UNABBREV_TAC `g` 1623 >> Q.UNABBREV_TAC `P` >> RW_TAC std_ss []) 1624 >> POP_ORW 1625 >> MATCH_MP_TAC REAL_LE_TRANS 1626 >> Q.EXISTS_TAC `logr b (SIGMA (\x. P x * g x) (IMAGE X (p_space p)))` 1627 >> CONJ_TAC 1628 >- ((MATCH_MP_TAC o UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) 1629 jensen_pos_concave_SIGMA 1630 >> Q.UNABBREV_TAC `g` >> Q.UNABBREV_TAC `P` 1631 >> `prob_space (IMAGE X (p_space p), POW (IMAGE X (p_space p)), distribution p X)` 1632 by METIS_TAC [distribution_prob_space, space_def, subsets_def] 1633 >> CONJ_TAC 1634 >- ((MP_TAC o GSYM o Q.ISPECL [`(IMAGE (X :'a -> 'b) 1635 (p_space 1636 (p : 1637 ('a -> bool) # 1638 (('a -> bool) -> bool) # (('a -> bool) -> real))), 1639 POW (IMAGE X (p_space p)),distribution p X)`,`IMAGE (X :'a -> 'b) (p_space p)`]) PROB_REAL_SUM_IMAGE 1640 >> RW_TAC std_ss [EVENTS, IN_POW, SUBSET_REFL, PROB, SUBSET_DEF, IN_SING] 1641 >> METIS_TAC [PROB_UNIV, PROB, PSPACE]) 1642 >> CONJ_TAC 1643 >- METIS_TAC [PROB_POSITIVE, EVENTS, IN_POW, SUBSET_DEF, IN_SING, PROB_LE_1, PROB] 1644 >> RW_TAC std_ss [REAL_INV_POS, pos_concave_logr]) 1645 >> Q.UNABBREV_TAC `g` >> Q.UNABBREV_TAC `P` 1646 >> Q.ABBREV_TAC `foo = logr b (& (CARD (IMAGE X (p_space p) INTER {x | ~(distribution p X {x} = 0)})))` 1647 >> `(IMAGE X (p_space p)) = ((IMAGE X (p_space p)) INTER {x| ~(distribution p X {x} = 0)}) UNION 1648 ((IMAGE X (p_space p)) DIFF {x| ~(distribution p X {x} = 0)})` 1649 by (ONCE_REWRITE_TAC [EXTENSION] >> RW_TAC std_ss [IN_UNION, IN_INTER, IN_DIFF] >> DECIDE_TAC) 1650 >> POP_ORW 1651 >> Q.UNABBREV_TAC `foo` 1652 >> `DISJOINT ((IMAGE X (p_space p)) INTER {x| ~(distribution p X {x} = 0)}) 1653 ((IMAGE X (p_space p)) DIFF {x| ~(distribution p X {x} = 0)})` 1654 by (RW_TAC std_ss [IN_DISJOINT, IN_INTER, IN_DIFF] >> DECIDE_TAC) 1655 >> RW_TAC std_ss [REAL_SUM_IMAGE_DISJOINT_UNION, IMAGE_FINITE, INTER_FINITE, FINITE_DIFF] 1656 >> `FINITE (IMAGE X (p_space p) DIFF {x | ~(distribution p X {x} = 0)})` 1657 by RW_TAC std_ss [FINITE_DIFF, IMAGE_FINITE] 1658 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p) DIFF 1659 {x | ~(distribution p (X :'a -> 'b) {x} = (0 :real))})`) REAL_SUM_IMAGE_IN_IF] 1660 >> RW_TAC real_ss [IN_DIFF, GSPECIFICATION, REAL_SUM_IMAGE_0] 1661 >> `FINITE (IMAGE X (p_space p) INTER {x | ~(distribution p X {x} = 0)})` 1662 by RW_TAC std_ss [INTER_FINITE, IMAGE_FINITE] 1663 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p) INTER 1664 {x | ~(distribution p (X :'a -> 'b) {x} = (0 :real))})`) REAL_SUM_IMAGE_IN_IF] 1665 >> `(\x. 1666 (if x IN IMAGE X (p_space p) INTER {x | ~(distribution p X {x} = 0)} then 1667 (\x. distribution p X {x} * inv (distribution p X {x})) x 1668 else 1669 0)) = 1670 (\x. 1671 (if x IN IMAGE X (p_space p) INTER {x | ~(distribution p X {x} = 0)} then 1672 1 1673 else 1674 0))` 1675 by (RW_TAC real_ss [FUN_EQ_THM] 1676 >> RW_TAC std_ss [IN_INTER, GSPECIFICATION] 1677 >> MATCH_MP_TAC REAL_MUL_RINV 1678 >> ASM_REWRITE_TAC []) 1679 >> POP_ORW 1680 >> ASM_SIMP_TAC std_ss [REAL_SUM_IMAGE_EQ_CARD, REAL_LE_REFL]); 1681 1682 1683(* --------------- entropy is maximal for a uniform rv --------------------- *) 1684 1685 1686val finite_entropy_uniform_max = store_thm 1687 ("finite_entropy_uniform_max", 1688 ``!b p X. 1689 (POW (p_space p) = events p) /\ 1690 random_variable X p 1691 (IMAGE X (p_space p),POW (IMAGE X (p_space p))) /\ 1692 FINITE (p_space p) /\ 1693 (!x y. x IN IMAGE X (p_space p) /\ y IN IMAGE X (p_space p) ==> 1694 (distribution p X {x} = distribution p X {y})) ==> 1695 (entropy b p (IMAGE X (p_space p),POW (IMAGE X (p_space p))) X = logr b (& (CARD ((IMAGE X (p_space p))))))``, 1696 RW_TAC std_ss [finite_entropy_reduce] 1697 >> `!x. ~x = ~1*x` by REAL_ARITH_TAC >> POP_ORW 1698 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_CMUL, IMAGE_FINITE] 1699 >> RW_TAC real_ss [] 1700 >> ONCE_REWRITE_TAC [GSYM REAL_MUL_RNEG] 1701 >> `FINITE (IMAGE X (p_space p))` by RW_TAC std_ss [IMAGE_FINITE] 1702 1703 >> (MP_TAC o Q.SPEC `(\x. distribution p X {x})` o UNDISCH o 1704 Q.ISPEC `IMAGE (X :'a -> 'b) (p_space p)`) 1705 REAL_SUM_IMAGE_CONST_EQ_1_EQ_INV_CARD 1706 >> `prob_space (IMAGE X (p_space p), POW (IMAGE X (p_space p)), distribution p X)` 1707 by METIS_TAC [distribution_prob_space, space_def, subsets_def] 1708 >> `(SIGMA (\x. distribution p X {x}) (IMAGE X (p_space p)) = 1)` 1709 by ((MP_TAC o GSYM o Q.ISPECL [`(IMAGE (X :'a -> 'b) 1710 (p_space 1711 (p : 1712 ('a -> bool) # 1713 (('a -> bool) -> bool) # (('a -> bool) -> real))), 1714 POW (IMAGE X (p_space p)),distribution p X)`,`IMAGE (X :'a -> 'b) (p_space p)`]) PROB_REAL_SUM_IMAGE 1715 >> RW_TAC std_ss [EVENTS, IN_POW, SUBSET_REFL, PROB, SUBSET_DEF, IN_SING] 1716 >> METIS_TAC [PROB_UNIV, PROB, PSPACE]) 1717 >> ASM_SIMP_TAC std_ss [] 1718 >> STRIP_TAC 1719 >> ONCE_REWRITE_TAC [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) REAL_SUM_IMAGE_IN_IF] 1720 >> Q.ABBREV_TAC `foo = (& (CARD (IMAGE (X :'a -> 'b) (p_space p))))` 1721 >> `(\x. 1722 (if x IN IMAGE X (p_space p) then 1723 (\x. distribution p X {x} * ~logr b (distribution p X {x})) x 1724 else 1725 0)) = 1726 (\x. 1727 (if x IN IMAGE X (p_space p) then 1728 (\x. inv foo * ~logr b (inv foo)) x 1729 else 1730 0))` 1731 by METIS_TAC [] 1732 >> POP_ORW 1733 >> ASM_SIMP_TAC std_ss [GSYM REAL_SUM_IMAGE_IN_IF] 1734 >> (MP_TAC o Q.SPECL [`(\x. inv foo * ~ logr b (inv foo))`,`inv foo * ~ logr b (inv foo)`] o UNDISCH o 1735 Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) REAL_SUM_IMAGE_FINITE_CONST 1736 >> RW_TAC std_ss [] 1737 >> POP_ASSUM (K ALL_TAC) 1738 >> `0 < foo` 1739 by (Q.UNABBREV_TAC `foo` >> MATCH_MP_TAC REAL_NZ_IMP_LT 1740 >> RW_TAC std_ss [(UNDISCH o Q.ISPEC `(IMAGE (X :'a -> 'b) (p_space p))`) CARD_EQ_0] 1741 >> SPOSE_NOT_THEN STRIP_ASSUME_TAC >> FULL_SIMP_TAC real_ss [REAL_SUM_IMAGE_THM]) 1742 >> `~logr b (inv foo) = logr b (inv (inv foo))` 1743 by (MATCH_MP_TAC neg_logr >> RW_TAC std_ss [REAL_LT_INV_EQ]) 1744 >> POP_ORW 1745 >> RW_TAC real_ss [REAL_INVINV, REAL_LT_IMP_NE, REAL_MUL_ASSOC, REAL_MUL_RINV]); 1746 1747val _ = export_theory (); 1748