1(*  Title:    HOL/Probability/Distribution_Functions.thy
2    Authors:  Jeremy Avigad (CMU) and Luke Serafin (CMU)
3*)
4
5section \<open>Distribution Functions\<close>
6
7text \<open>
8Shows that the cumulative distribution function (cdf) of a distribution (a measure on the reals) is
9nondecreasing and right continuous, which tends to 0 and 1 in either direction.
10
11Conversely, every such function is the cdf of a unique distribution. This direction defines the
12measure in the obvious way on half-open intervals, and then applies the Caratheodory extension
13theorem.
14\<close>
15
16(* TODO: the locales "finite_borel_measure" and "real_distribution" are defined here, but maybe they
17 should be somewhere else. *)
18
19theory Distribution_Functions
20  imports Probability_Measure
21begin
22
23lemma UN_Ioc_eq_UNIV: "(\<Union>n. { -real n <.. real n}) = UNIV"
24  by auto
25     (metis le_less_trans minus_minus neg_less_iff_less not_le real_arch_simple
26            of_nat_0_le_iff reals_Archimedean2)
27
28subsection \<open>Properties of cdf's\<close>
29
30definition
31  cdf :: "real measure \<Rightarrow> real \<Rightarrow> real"
32where
33  "cdf M \<equiv> \<lambda>x. measure M {..x}"
34
35lemma cdf_def2: "cdf M x = measure M {..x}"
36  by (simp add: cdf_def)
37
38locale finite_borel_measure = finite_measure M for M :: "real measure" +
39  assumes M_is_borel: "sets M = sets borel"
40begin
41
42lemma sets_M[intro]: "a \<in> sets borel \<Longrightarrow> a \<in> sets M"
43  using M_is_borel by auto
44
45lemma cdf_diff_eq:
46  assumes "x < y"
47  shows "cdf M y - cdf M x = measure M {x<..y}"
48proof -
49  from assms have *: "{..x} \<union> {x<..y} = {..y}" by auto
50  have "measure M {..y} = measure M {..x} + measure M {x<..y}"
51    by (subst finite_measure_Union [symmetric], auto simp add: *)
52  thus ?thesis
53    unfolding cdf_def by auto
54qed
55
56lemma cdf_nondecreasing: "x \<le> y \<Longrightarrow> cdf M x \<le> cdf M y"
57  unfolding cdf_def by (auto intro!: finite_measure_mono)
58
59lemma borel_UNIV: "space M = UNIV"
60 by (metis in_mono sets.sets_into_space space_in_borel top_le M_is_borel)
61
62lemma cdf_nonneg: "cdf M x \<ge> 0"
63  unfolding cdf_def by (rule measure_nonneg)
64
65lemma cdf_bounded: "cdf M x \<le> measure M (space M)"
66  unfolding cdf_def by (intro bounded_measure)
67
68lemma cdf_lim_infty:
69  "((\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> measure M (space M))"
70proof -
71  have "(\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> measure M (\<Union> i::nat. {..real i})"
72    unfolding cdf_def by (rule finite_Lim_measure_incseq) (auto simp: incseq_def)
73  also have "(\<Union> i::nat. {..real i}) = space M"
74    by (auto simp: borel_UNIV intro: real_arch_simple)
75  finally show ?thesis .
76qed
77
78lemma cdf_lim_at_top: "(cdf M \<longlongrightarrow> measure M (space M)) at_top"
79  by (rule tendsto_at_topI_sequentially_real)
80     (simp_all add: mono_def cdf_nondecreasing cdf_lim_infty)
81
82lemma cdf_lim_neg_infty: "((\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> 0)"
83proof -
84  have "(\<lambda>i. cdf M (- real i)) \<longlonglongrightarrow> measure M (\<Inter> i::nat. {.. - real i })"
85    unfolding cdf_def by (rule finite_Lim_measure_decseq) (auto simp: decseq_def)
86  also have "(\<Inter> i::nat. {..- real i}) = {}"
87    by auto (metis leD le_minus_iff reals_Archimedean2)
88  finally show ?thesis
89    by simp
90qed
91
92lemma cdf_lim_at_bot: "(cdf M \<longlongrightarrow> 0) at_bot"
93proof -
94  have *: "((\<lambda>x :: real. - cdf M (- x)) \<longlongrightarrow> 0) at_top"
95    by (intro tendsto_at_topI_sequentially_real monoI)
96       (auto simp: cdf_nondecreasing cdf_lim_neg_infty tendsto_minus_cancel_left[symmetric])
97  from filterlim_compose [OF *, OF filterlim_uminus_at_top_at_bot]
98  show ?thesis
99    unfolding tendsto_minus_cancel_left[symmetric] by simp
100qed
101
102lemma cdf_is_right_cont: "continuous (at_right a) (cdf M)"
103  unfolding continuous_within
104proof (rule tendsto_at_right_sequentially[where b="a + 1"])
105  fix f :: "nat \<Rightarrow> real" and x assume f: "decseq f" "f \<longlonglongrightarrow> a"
106  then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Inter>i. {.. f i})"
107    using \<open>decseq f\<close> unfolding cdf_def
108    by (intro finite_Lim_measure_decseq) (auto simp: decseq_def)
109  also have "(\<Inter>i. {.. f i}) = {.. a}"
110    using decseq_ge[OF f] by (auto intro: order_trans LIMSEQ_le_const[OF f(2)])
111  finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> cdf M a"
112    by (simp add: cdf_def)
113qed simp
114
115lemma cdf_at_left: "(cdf M \<longlongrightarrow> measure M {..<a}) (at_left a)"
116proof (rule tendsto_at_left_sequentially[of "a - 1"])
117  fix f :: "nat \<Rightarrow> real" and x assume f: "incseq f" "f \<longlonglongrightarrow> a" "\<And>x. f x < a" "\<And>x. a - 1 < f x"
118  then have "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M (\<Union>i. {.. f i})"
119    using \<open>incseq f\<close> unfolding cdf_def
120    by (intro finite_Lim_measure_incseq) (auto simp: incseq_def)
121  also have "(\<Union>i. {.. f i}) = {..<a}"
122    by (auto dest!: order_tendstoD(1)[OF f(2)] eventually_happens'[OF sequentially_bot]
123             intro: less_imp_le le_less_trans f(3))
124  finally show "(\<lambda>n. cdf M (f n)) \<longlonglongrightarrow> measure M {..<a}"
125    by (simp add: cdf_def)
126qed auto
127
128lemma isCont_cdf: "isCont (cdf M) x \<longleftrightarrow> measure M {x} = 0"
129proof -
130  have "isCont (cdf M) x \<longleftrightarrow> cdf M x = measure M {..<x}"
131    by (auto simp: continuous_at_split cdf_is_right_cont continuous_within[where s="{..< _}"]
132                   cdf_at_left tendsto_unique[OF _ cdf_at_left])
133  also have "cdf M x = measure M {..<x} \<longleftrightarrow> measure M {x} = 0"
134    unfolding cdf_def ivl_disj_un(2)[symmetric]
135    by (subst finite_measure_Union) auto
136  finally show ?thesis .
137qed
138
139lemma countable_atoms: "countable {x. measure M {x} > 0}"
140  using countable_support unfolding zero_less_measure_iff .
141
142end
143
144locale real_distribution = prob_space M for M :: "real measure" +
145  assumes events_eq_borel [simp, measurable_cong]: "sets M = sets borel"
146begin
147
148lemma finite_borel_measure_M: "finite_borel_measure M"
149  by standard auto
150
151sublocale finite_borel_measure M
152  by (rule finite_borel_measure_M)
153
154lemma space_eq_univ [simp]: "space M = UNIV"
155  using events_eq_borel[THEN sets_eq_imp_space_eq] by simp
156
157lemma cdf_bounded_prob: "\<And>x. cdf M x \<le> 1"
158  by (subst prob_space [symmetric], rule cdf_bounded)
159
160lemma cdf_lim_infty_prob: "(\<lambda>i. cdf M (real i)) \<longlonglongrightarrow> 1"
161  by (subst prob_space [symmetric], rule cdf_lim_infty)
162
163lemma cdf_lim_at_top_prob: "(cdf M \<longlongrightarrow> 1) at_top"
164  by (subst prob_space [symmetric], rule cdf_lim_at_top)
165
166lemma measurable_finite_borel [simp]:
167  "f \<in> borel_measurable borel \<Longrightarrow> f \<in> borel_measurable M"
168  by (rule borel_measurable_subalgebra[where N=borel]) auto
169
170end
171
172lemma (in prob_space) real_distribution_distr [intro, simp]:
173  "random_variable borel X \<Longrightarrow> real_distribution (distr M borel X)"
174  unfolding real_distribution_def real_distribution_axioms_def by (auto intro!: prob_space_distr)
175
176subsection \<open>Uniqueness\<close>
177
178lemma (in finite_borel_measure) emeasure_Ioc:
179  assumes "a \<le> b" shows "emeasure M {a <.. b} = cdf M b - cdf M a"
180proof -
181  have "{a <.. b} = {..b} - {..a}"
182    by auto
183  moreover have "{..x} \<in> sets M" for x
184    using atMost_borel[of x] M_is_borel by auto
185  moreover note \<open>a \<le> b\<close>
186  ultimately show ?thesis
187    by (simp add: emeasure_eq_measure finite_measure_Diff cdf_def)
188qed
189
190lemma cdf_unique':
191  fixes M1 M2
192  assumes "finite_borel_measure M1" and "finite_borel_measure M2"
193  assumes "cdf M1 = cdf M2"
194  shows "M1 = M2"
195proof (rule measure_eqI_generator_eq[where \<Omega>=UNIV])
196  fix X assume "X \<in> range (\<lambda>(a, b). {a<..b::real})"
197  then obtain a b where Xeq: "X = {a<..b}" by auto
198  then show "emeasure M1 X = emeasure M2 X"
199    by (cases "a \<le> b")
200       (simp_all add: assms(1,2)[THEN finite_borel_measure.emeasure_Ioc] assms(3))
201next
202  show "(\<Union>i. {- real (i::nat)<..real i}) = UNIV"
203    by (rule UN_Ioc_eq_UNIV)
204qed (auto simp: finite_borel_measure.emeasure_Ioc[OF assms(1)]
205  assms(1,2)[THEN finite_borel_measure.M_is_borel] borel_sigma_sets_Ioc
206  Int_stable_def)
207
208lemma cdf_unique:
209  "real_distribution M1 \<Longrightarrow> real_distribution M2 \<Longrightarrow> cdf M1 = cdf M2 \<Longrightarrow> M1 = M2"
210  using cdf_unique'[of M1 M2] by (simp add: real_distribution.finite_borel_measure_M)
211
212lemma
213  fixes F :: "real \<Rightarrow> real"
214  assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y"
215    and right_cont_F : "\<And>a. continuous (at_right a) F"
216    and lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot"
217    and lim_F_at_top : "(F \<longlongrightarrow> m) at_top"
218    and m: "0 \<le> m"
219  shows interval_measure_UNIV: "emeasure (interval_measure F) UNIV = m"
220    and finite_borel_measure_interval_measure: "finite_borel_measure (interval_measure F)"
221proof -
222  let ?F = "interval_measure F"
223  { have "ennreal (m - 0) = (SUP i. ennreal (F (real i) - F (- real i)))"
224      by (intro LIMSEQ_unique[OF _ LIMSEQ_SUP] tendsto_ennrealI tendsto_intros
225                lim_F_at_bot[THEN filterlim_compose] lim_F_at_top[THEN filterlim_compose]
226                lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially
227                filterlim_uminus_at_top[THEN iffD1])
228         (auto simp: incseq_def nondecF intro!: diff_mono)
229    also have "\<dots> = (SUP i. emeasure ?F {- real i<..real i})"
230      by (subst emeasure_interval_measure_Ioc) (simp_all add: nondecF right_cont_F)
231    also have "\<dots> = emeasure ?F (\<Union>i::nat. {- real i<..real i})"
232      by (rule SUP_emeasure_incseq) (auto simp: incseq_def)
233    also have "(\<Union>i. {- real (i::nat)<..real i}) = space ?F"
234      by (simp add: UN_Ioc_eq_UNIV)
235    finally have "emeasure ?F (space ?F) = m"
236      by simp }
237  note * = this
238  then show "emeasure (interval_measure F) UNIV = m"
239    by simp
240
241  interpret finite_measure ?F
242  proof
243    show "emeasure ?F (space ?F) \<noteq> \<infinity>"
244      using * by simp
245  qed
246  show "finite_borel_measure (interval_measure F)"
247    proof qed simp_all
248qed
249
250lemma real_distribution_interval_measure:
251  fixes F :: "real \<Rightarrow> real"
252  assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and
253    right_cont_F : "\<And>a. continuous (at_right a) F" and
254    lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot" and
255    lim_F_at_top : "(F \<longlongrightarrow> 1) at_top"
256  shows "real_distribution (interval_measure F)"
257proof -
258  let ?F = "interval_measure F"
259  interpret prob_space ?F
260    proof qed (use interval_measure_UNIV[OF assms] in simp)
261  show ?thesis
262    proof qed simp_all
263qed
264
265lemma
266  fixes F :: "real \<Rightarrow> real"
267  assumes nondecF : "\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y" and
268    right_cont_F : "\<And>a. continuous (at_right a) F" and
269    lim_F_at_bot : "(F \<longlongrightarrow> 0) at_bot"
270  shows emeasure_interval_measure_Iic: "emeasure (interval_measure F) {.. x} = F x"
271    and measure_interval_measure_Iic: "measure (interval_measure F) {.. x} = F x"
272  unfolding cdf_def
273proof -
274  have F_nonneg[simp]: "0 \<le> F y" for y
275    using lim_F_at_bot by (rule tendsto_upperbound) (auto simp: eventually_at_bot_linorder nondecF intro!: exI[of _ y])
276
277  have "emeasure (interval_measure F) (\<Union>i::nat. {-real i <.. x}) = F x - ennreal 0"
278  proof (intro LIMSEQ_unique[OF Lim_emeasure_incseq])
279    have "(\<lambda>i. F x - F (- real i)) \<longlonglongrightarrow> F x - 0"
280      by (intro tendsto_intros lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially
281                filterlim_uminus_at_top[THEN iffD1])
282    from tendsto_ennrealI[OF this]
283    show "(\<lambda>i. emeasure (interval_measure F) {- real i<..x}) \<longlonglongrightarrow> F x - ennreal 0"
284      apply (rule filterlim_cong[THEN iffD1, rotated 3])
285        apply simp
286       apply simp
287      apply (rule eventually_sequentiallyI[where c="nat (ceiling (- x))"])
288      apply (simp add: emeasure_interval_measure_Ioc right_cont_F nondecF)
289      done
290  qed (auto simp: incseq_def)
291  also have "(\<Union>i::nat. {-real i <.. x}) = {..x}"
292    by auto (metis minus_minus neg_less_iff_less reals_Archimedean2)
293  finally show "emeasure (interval_measure F) {..x} = F x"
294    by simp
295  then show "measure (interval_measure F) {..x} = F x"
296    by (simp add: measure_def)
297qed
298
299lemma cdf_interval_measure:
300  "(\<And> x y. x \<le> y \<Longrightarrow> F x \<le> F y) \<Longrightarrow> (\<And>a. continuous (at_right a) F) \<Longrightarrow> (F \<longlongrightarrow> 0) at_bot \<Longrightarrow> cdf (interval_measure F) = F"
301  by (simp add: cdf_def fun_eq_iff measure_interval_measure_Iic)
302
303end
304