1(* Title: HOL/Probability/Distribution_Functions.thy 2 Authors: Jeremy Avigad (CMU) and Luke Serafin (CMU) 3*) 4 5section \<open>Distribution Functions\<close> 6 7text \<open> 8Shows that the cumulative distribution function (cdf) of a distribution (a measure on the reals) is 9nondecreasing and right continuous, which tends to 0 and 1 in either direction. 10 11Conversely, every such function is the cdf of a unique distribution. This direction defines the 12measure in the obvious way on half-open intervals, and then applies the Caratheodory extension 13theorem. 14\<close> 15 16(* TODO: the locales "finite_borel_measure" and "real_distribution" are defined here, but maybe they 17 should be somewhere else. *) 18 19theory Distribution_Functions 20 imports Probability_Measure 21begin 22 23lemma UN_Ioc_eq_UNIV: "(\<Union>n. { -real n <.. real n}) = UNIV" 24 by auto 25 (metis le_less_trans minus_minus neg_less_iff_less not_le real_arch_simple 26 of_nat_0_le_iff reals_Archimedean2) 27 28subsection \<open>Properties of cdf's\<close> 29 30definition 31 cdf :: "real measure \<Rightarrow> real \<Rightarrow> real" 32where 33 "cdf M \<equiv> \<lambda>x. measure M {..x}" 34 35lemma cdf_def2: "cdf M x = measure M {..x}" 36 by (simp add: cdf_def) 37 38locale finite_borel_measure = finite_measure M for M :: "real measure" + 39 assumes M_is_borel: "sets M = sets borel" 40begin 41 42lemma sets_M[intro]: "a \<in> sets borel \<Longrightarrow> a \<in> sets M" 43 using M_is_borel by auto 44 45lemma cdf_diff_eq: 46 assumes "x < y" 47 shows "cdf M y - cdf M x = measure M {x<..y}" 48proof - 49 from assms have *: "{..x} \<union> {x<..y} = {..y}" by auto 50 have "measure M {..y} = measure M {..x} + measure M {x<..y}" 51 by (subst finite_measure_Union [symmetric], auto simp add: *) 52 thus ?thesis 53 unfolding cdf_def by auto 54qed 55 56lemma cdf_nondecreasing: "x \<le> y \<Longrightarrow> cdf M x \<le> cdf M y" 57 unfolding cdf_def by (auto intro!: finite_measure_mono) 58 59lemma borel_UNIV: "space M = UNIV" 60 by (metis in_mono sets.sets_into_space space_in_borel top_le M_is_borel) 61 62lemma cdf_nonneg: "cdf M x \<ge> 0" 63 unfolding cdf_def by (rule measure_nonneg) 64 65lemma cdf_bounded: "cdf M x \<le> measure M (space M)" 66 unfolding cdf_def by (intro bounded_measure) 67 68lemma cdf_lim_infty: 69 "((\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> measure M (space M))" 70proof - 71 have "(\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> measure M (\<Union> i::nat. {..real i})" 72 unfolding cdf_def by (rule finite_Lim_measure_incseq) (auto simp: incseq_def) 73 also have "(\<Union> i::nat. {..real i}) = space M" 74 by (auto simp: borel_UNIV intro: real_arch_simple) 75 finally show ?thesis . 76qed 77 78lemma cdf_lim_at_top: "(cdf M \<longlongrightarrow> measure M (space M)) at_top" 79 by (rule tendsto_at_topI_sequentially_real) 80 (simp_all add: mono_def cdf_nondecreasing cdf_lim_infty) 81 82lemma cdf_lim_neg_infty: "((\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> 0)" 83proof - 84 have "(\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> measure M (\<Inter> i::nat. {.. - real i })" 85 unfolding cdf_def by (rule finite_Lim_measure_decseq) (auto simp: decseq_def) 86 also have "(\<Inter> i::nat. {..- real i}) = {}" 87 by auto (metis leD le_minus_iff reals_Archimedean2) 88 finally show ?thesis 89 by simp 90qed 91 92lemma cdf_lim_at_bot: "(cdf M \<longlongrightarrow> 0) at_bot" 93proof - 94 have *: "((\<lambda>x :: real. - cdf M (- x)) \<longlongrightarrow> 0) at_top" 95 by (intro tendsto_at_topI_sequentially_real monoI) 96 (auto simp: cdf_nondecreasing cdf_lim_neg_infty tendsto_minus_cancel_left[symmetric]) 97 from filterlim_compose [OF *, OF filterlim_uminus_at_top_at_bot] 98 show ?thesis 99 unfolding tendsto_minus_cancel_left[symmetric] by simp 100qed 101 102lemma cdf_is_right_cont: "continuous (at_right a) (cdf M)" 103 unfolding continuous_within 104proof (rule tendsto_at_right_sequentially[where b="a + 1"]) 105 fix f :: "nat \<Rightarrow> real" and x assume f: "decseq f" "f \<longlonglongrightarrow> a" 106 then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Inter>i. {.. f i})" 107 using \<open>decseq f\<close> unfolding cdf_def 108 by (intro finite_Lim_measure_decseq) (auto simp: decseq_def) 109 also have "(\<Inter>i. {.. f i}) = {.. a}" 110 using decseq_ge[OF f] by (auto intro: order_trans LIMSEQ_le_const[OF f(2)]) 111 finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> cdf M a" 112 by (simp add: cdf_def) 113qed simp 114 115lemma cdf_at_left: "(cdf M \<longlongrightarrow> measure M {..<a}) (at_left a)" 116proof (rule tendsto_at_left_sequentially[of "a - 1"]) 117 fix f :: "nat \<Rightarrow> real" and x assume f: "incseq f" "f \<longlonglongrightarrow> a" "\<And>x. f x < a" "\<And>x. a - 1 < f x" 118 then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Union>i. {.. f i})" 119 using \<open>incseq f\<close> unfolding cdf_def 120 by (intro finite_Lim_measure_incseq) (auto simp: incseq_def) 121 also have "(\<Union>i. {.. f i}) = {..<a}" 122 by (auto dest!: order_tendstoD(1)[OF f(2)] eventually_happens'[OF sequentially_bot] 123 intro: less_imp_le le_less_trans f(3)) 124 finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M {..<a}" 125 by (simp add: cdf_def) 126qed auto 127 128lemma isCont_cdf: "isCont (cdf M) x \<longleftrightarrow> measure M {x} = 0" 129proof - 130 have "isCont (cdf M) x \<longleftrightarrow> cdf M x = measure M {..<x}" 131 by (auto simp: continuous_at_split cdf_is_right_cont continuous_within[where s="{..< _}"] 132 cdf_at_left tendsto_unique[OF _ cdf_at_left]) 133 also have "cdf M x = measure M {..<x} \<longleftrightarrow> measure M {x} = 0" 134 unfolding cdf_def ivl_disj_un(2)[symmetric] 135 by (subst finite_measure_Union) auto 136 finally show ?thesis . 137qed 138 139lemma countable_atoms: "countable {x. measure M {x} > 0}" 140 using countable_support unfolding zero_less_measure_iff . 141 142end 143 144locale real_distribution = prob_space M for M :: "real measure" + 145 assumes events_eq_borel [simp, measurable_cong]: "sets M = sets borel" 146begin 147 148lemma finite_borel_measure_M: "finite_borel_measure M" 149 by standard auto 150 151sublocale finite_borel_measure M 152 by (rule finite_borel_measure_M) 153 154lemma space_eq_univ [simp]: "space M = UNIV" 155 using events_eq_borel[THEN sets_eq_imp_space_eq] by simp 156 157lemma cdf_bounded_prob: "\<And>x. cdf M x \<le> 1" 158 by (subst prob_space [symmetric], rule cdf_bounded) 159 160lemma cdf_lim_infty_prob: "(\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> 1" 161 by (subst prob_space [symmetric], rule cdf_lim_infty) 162 163lemma cdf_lim_at_top_prob: "(cdf M \<longlongrightarrow> 1) at_top" 164 by (subst prob_space [symmetric], rule cdf_lim_at_top) 165 166lemma measurable_finite_borel [simp]: 167 "f \<in> borel_measurable borel \<Longrightarrow> f \<in> borel_measurable M" 168 by (rule borel_measurable_subalgebra[where N=borel]) auto 169 170end 171 172lemma (in prob_space) real_distribution_distr [intro, simp]: 173 "random_variable borel X \<Longrightarrow> real_distribution (distr M borel X)" 174 unfolding real_distribution_def real_distribution_axioms_def by (auto intro!: prob_space_distr) 175 176subsection \<open>Uniqueness\<close> 177 178lemma (in finite_borel_measure) emeasure_Ioc: 179 assumes "a \<le> b" shows "emeasure M {a <.. b} = cdf M b - cdf M a" 180proof - 181 have "{a <.. b} = {..b} - {..a}" 182 by auto 183 moreover have "{..x} \<in> sets M" for x 184 using atMost_borel[of x] M_is_borel by auto 185 moreover note \<open>a \<le> b\<close> 186 ultimately show ?thesis 187 by (simp add: emeasure_eq_measure finite_measure_Diff cdf_def) 188qed 189 190lemma cdf_unique': 191 fixes M1 M2 192 assumes "finite_borel_measure M1" and "finite_borel_measure M2" 193 assumes "cdf M1 = cdf M2" 194 shows "M1 = M2" 195proof (rule measure_eqI_generator_eq[where \<Omega>=UNIV]) 196 fix X assume "X \<in> range (\<lambda>(a, b). {a<..b::real})" 197 then obtain a b where Xeq: "X = {a<..b}" by auto 198 then show "emeasure M1 X = emeasure M2 X" 199 by (cases "a \<le> b") 200 (simp_all add: assms(1,2)[THEN finite_borel_measure.emeasure_Ioc] assms(3)) 201next 202 show "(\<Union>i. {- real (i::nat)<..real i}) = UNIV" 203 by (rule UN_Ioc_eq_UNIV) 204qed (auto simp: finite_borel_measure.emeasure_Ioc[OF assms(1)] 205 assms(1,2)[THEN finite_borel_measure.M_is_borel] borel_sigma_sets_Ioc 206 Int_stable_def) 207 208lemma cdf_unique: 209 "real_distribution M1 \<Longrightarrow> real_distribution M2 \<Longrightarrow> cdf M1 = cdf M2 \<Longrightarrow> M1 = M2" 210 using cdf_unique'[of M1 M2] by (simp add: real_distribution.finite_borel_measure_M) 211 212lemma 213 fixes F :: "real \<Rightarrow> real" 214 assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" 215 and right_cont_F : "\<And>a. continuous (at_right a) F" 216 and lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" 217 and lim_F_at_top : "(F \<longlongrightarrow> m) at_top" 218 and m: "0 \<le> m" 219 shows interval_measure_UNIV: "emeasure (interval_measure F) UNIV = m" 220 and finite_borel_measure_interval_measure: "finite_borel_measure (interval_measure F)" 221proof - 222 let ?F = "interval_measure F" 223 { have "ennreal (m - 0) = (SUP i. ennreal (F (real i) - F (- real i)))" 224 by (intro LIMSEQ_unique[OF _ LIMSEQ_SUP] tendsto_ennrealI tendsto_intros 225 lim_F_at_bot[THEN filterlim_compose] lim_F_at_top[THEN filterlim_compose] 226 lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially 227 filterlim_uminus_at_top[THEN iffD1]) 228 (auto simp: incseq_def nondecF intro!: diff_mono) 229 also have "\<dots> = (SUP i. emeasure ?F {- real i<..real i})" 230 by (subst emeasure_interval_measure_Ioc) (simp_all add: nondecF right_cont_F) 231 also have "\<dots> = emeasure ?F (\<Union>i::nat. {- real i<..real i})" 232 by (rule SUP_emeasure_incseq) (auto simp: incseq_def) 233 also have "(\<Union>i. {- real (i::nat)<..real i}) = space ?F" 234 by (simp add: UN_Ioc_eq_UNIV) 235 finally have "emeasure ?F (space ?F) = m" 236 by simp } 237 note * = this 238 then show "emeasure (interval_measure F) UNIV = m" 239 by simp 240 241 interpret finite_measure ?F 242 proof 243 show "emeasure ?F (space ?F) \<noteq> \<infinity>" 244 using * by simp 245 qed 246 show "finite_borel_measure (interval_measure F)" 247 proof qed simp_all 248qed 249 250lemma real_distribution_interval_measure: 251 fixes F :: "real \<Rightarrow> real" 252 assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and 253 right_cont_F : "\<And>a. continuous (at_right a) F" and 254 lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" and 255 lim_F_at_top : "(F \<longlongrightarrow> 1) at_top" 256 shows "real_distribution (interval_measure F)" 257proof - 258 let ?F = "interval_measure F" 259 interpret prob_space ?F 260 proof qed (use interval_measure_UNIV[OF assms] in simp) 261 show ?thesis 262 proof qed simp_all 263qed 264 265lemma 266 fixes F :: "real \<Rightarrow> real" 267 assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and 268 right_cont_F : "\<And>a. continuous (at_right a) F" and 269 lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" 270 shows emeasure_interval_measure_Iic: "emeasure (interval_measure F) {.. x} = F x" 271 and measure_interval_measure_Iic: "measure (interval_measure F) {.. x} = F x" 272 unfolding cdf_def 273proof - 274 have F_nonneg[simp]: "0 \<le> F y" for y 275 using lim_F_at_bot by (rule tendsto_upperbound) (auto simp: eventually_at_bot_linorder nondecF intro!: exI[of _ y]) 276 277 have "emeasure (interval_measure F) (\<Union>i::nat. {-real i <.. x}) = F x - ennreal 0" 278 proof (intro LIMSEQ_unique[OF Lim_emeasure_incseq]) 279 have "(\<lambda>i. F x - F (- real i)) \<longlonglongrightarrow> F x - 0" 280 by (intro tendsto_intros lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially 281 filterlim_uminus_at_top[THEN iffD1]) 282 from tendsto_ennrealI[OF this] 283 show "(\<lambda>i. emeasure (interval_measure F) {- real i<..x}) \<longlonglongrightarrow> F x - ennreal 0" 284 apply (rule filterlim_cong[THEN iffD1, rotated 3]) 285 apply simp 286 apply simp 287 apply (rule eventually_sequentiallyI[where c="nat (ceiling (- x))"]) 288 apply (simp add: emeasure_interval_measure_Ioc right_cont_F nondecF) 289 done 290 qed (auto simp: incseq_def) 291 also have "(\<Union>i::nat. {-real i <.. x}) = {..x}" 292 by auto (metis minus_minus neg_less_iff_less reals_Archimedean2) 293 finally show "emeasure (interval_measure F) {..x} = F x" 294 by simp 295 then show "measure (interval_measure F) {..x} = F x" 296 by (simp add: measure_def) 297qed 298 299lemma cdf_interval_measure: 300 "(\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y) \<Longrightarrow> (\<And>a. continuous (at_right a) F) \<Longrightarrow> (F \<longlongrightarrow> 0) at_bot \<Longrightarrow> cdf (interval_measure F) = F" 301 by (simp add: cdf_def fun_eq_iff measure_interval_measure_Iic) 302 303end 304