1(* Title: HOL/Probability/Central_Limit_Theorem.thy 2 Authors: Jeremy Avigad (CMU), Luke Serafin (CMU) 3*) 4 5section \<open>The Central Limit Theorem\<close> 6 7theory Central_Limit_Theorem 8 imports Levy 9begin 10 11theorem (in prob_space) central_limit_theorem_zero_mean: 12 fixes X :: "nat \<Rightarrow> 'a \<Rightarrow> real" 13 and \<mu> :: "real measure" 14 and \<sigma> :: real 15 and S :: "nat \<Rightarrow> 'a \<Rightarrow> real" 16 assumes X_indep: "indep_vars (\<lambda>i. borel) X UNIV" 17 and X_integrable: "\<And>n. integrable M (X n)" 18 and X_mean_0: "\<And>n. expectation (X n) = 0" 19 and \<sigma>_pos: "\<sigma> > 0" 20 and X_square_integrable: "\<And>n. integrable M (\<lambda>x. (X n x)\<^sup>2)" 21 and X_variance: "\<And>n. variance (X n) = \<sigma>\<^sup>2" 22 and X_distrib: "\<And>n. distr M borel (X n) = \<mu>" 23 defines "S n \<equiv> \<lambda>x. \<Sum>i<n. X i x" 24 shows "weak_conv_m (\<lambda>n. distr M borel (\<lambda>x. S n x / sqrt (n * \<sigma>\<^sup>2))) std_normal_distribution" 25proof - 26 let ?S' = "\<lambda>n x. S n x / sqrt (real n * \<sigma>\<^sup>2)" 27 define \<phi> where "\<phi> n = char (distr M borel (?S' n))" for n 28 define \<psi> where "\<psi> n t = char \<mu> (t / sqrt (\<sigma>\<^sup>2 * n))" for n t 29 30 have X_rv [simp, measurable]: "\<And>n. random_variable borel (X n)" 31 using X_indep unfolding indep_vars_def2 by simp 32 interpret \<mu>: real_distribution \<mu> 33 by (subst X_distrib [symmetric, of 0], rule real_distribution_distr, simp) 34 35 (* these are equivalent to the hypotheses on X, given X_distr *) 36 have \<mu>_integrable [simp]: "integrable \<mu> (\<lambda>x. x)" 37 and \<mu>_mean_integrable [simp]: "\<mu>.expectation (\<lambda>x. x) = 0" 38 and \<mu>_square_integrable [simp]: "integrable \<mu> (\<lambda>x. x^2)" 39 and \<mu>_variance [simp]: "\<mu>.expectation (\<lambda>x. x^2) = \<sigma>\<^sup>2" 40 using assms by (simp_all add: X_distrib [symmetric, of 0] integrable_distr_eq integral_distr) 41 42 have main: "\<forall>\<^sub>F n in sequentially. 43 cmod (\<phi> n t - (1 + (-(t^2) / 2) / n)^n) \<le> 44 t\<^sup>2 / (6 * \<sigma>\<^sup>2) * (LINT x|\<mu>. min (6 * x\<^sup>2) (\<bar>t / sqrt (\<sigma>\<^sup>2 * n)\<bar> * \<bar>x\<bar> ^ 3))" for t 45 proof (rule eventually_sequentiallyI) 46 fix n :: nat 47 assume "n \<ge> nat (ceiling (t^2 / 4))" 48 hence n: "n \<ge> t^2 / 4" by (subst nat_ceiling_le_eq [symmetric]) 49 let ?t = "t / sqrt (\<sigma>\<^sup>2 * n)" 50 51 define \<psi>' where "\<psi>' n i = char (distr M borel (\<lambda>x. X i x / sqrt (\<sigma>\<^sup>2 * n)))" for n i 52 have *: "\<And>n i t. \<psi>' n i t = \<psi> n t" 53 unfolding \<psi>_def \<psi>'_def char_def 54 by (subst X_distrib [symmetric]) (auto simp: integral_distr) 55 56 have "\<phi> n t = char (distr M borel (\<lambda>x. \<Sum>i<n. X i x / sqrt (\<sigma>\<^sup>2 * real n))) t" 57 by (auto simp: \<phi>_def S_def sum_divide_distrib ac_simps) 58 also have "\<dots> = (\<Prod> i < n. \<psi>' n i t)" 59 unfolding \<psi>'_def 60 apply (rule char_distr_sum) 61 apply (rule indep_vars_compose2[where X=X]) 62 apply (rule indep_vars_subset) 63 apply (rule X_indep) 64 apply auto 65 done 66 also have "\<dots> = (\<psi> n t)^n" 67 by (auto simp add: * prod_constant) 68 finally have \<phi>_eq: "\<phi> n t =(\<psi> n t)^n" . 69 70 have "norm (\<psi> n t - (1 - ?t^2 * \<sigma>\<^sup>2 / 2)) \<le> ?t\<^sup>2 / 6 * (LINT x|\<mu>. min (6 * x\<^sup>2) (\<bar>?t\<bar> * \<bar>x\<bar> ^ 3))" 71 unfolding \<psi>_def by (rule \<mu>.char_approx3, auto) 72 also have "?t^2 * \<sigma>\<^sup>2 = t^2 / n" 73 using \<sigma>_pos by (simp add: power_divide) 74 also have "t^2 / n / 2 = (t^2 / 2) / n" 75 by simp 76 finally have **: "norm (\<psi> n t - (1 + (-(t^2) / 2) / n)) \<le> 77 ?t\<^sup>2 / 6 * (LINT x|\<mu>. min (6 * x\<^sup>2) (\<bar>?t\<bar> * \<bar>x\<bar> ^ 3))" by simp 78 79 have "norm (\<phi> n t - (complex_of_real (1 + (-(t^2) / 2) / n))^n) \<le> 80 n * norm (\<psi> n t - (complex_of_real (1 + (-(t^2) / 2) / n)))" 81 using n 82 by (auto intro!: norm_power_diff \<mu>.cmod_char_le_1 abs_leI 83 simp del: of_real_diff simp: of_real_diff[symmetric] divide_le_eq \<phi>_eq \<psi>_def) 84 also have "\<dots> \<le> n * (?t\<^sup>2 / 6 * (LINT x|\<mu>. min (6 * x\<^sup>2) (\<bar>?t\<bar> * \<bar>x\<bar> ^ 3)))" 85 by (rule mult_left_mono [OF **], simp) 86 also have "\<dots> = (t\<^sup>2 / (6 * \<sigma>\<^sup>2) * (LINT x|\<mu>. min (6 * x\<^sup>2) (\<bar>?t\<bar> * \<bar>x\<bar> ^ 3)))" 87 using \<sigma>_pos by (simp add: field_simps min_absorb2) 88 finally show "norm (\<phi> n t - (1 + (-(t^2) / 2) / n)^n) \<le> 89 (t\<^sup>2 / (6 * \<sigma>\<^sup>2) * (LINT x|\<mu>. min (6 * x\<^sup>2) (\<bar>?t\<bar> * \<bar>x\<bar> ^ 3)))" 90 by simp 91 qed 92 93 show ?thesis 94 proof (rule levy_continuity) 95 fix t 96 let ?t = "\<lambda>n. t / sqrt (\<sigma>\<^sup>2 * n)" 97 have "\<And>x. (\<lambda>n. min (6 * x\<^sup>2) (\<bar>t\<bar> * \<bar>x\<bar> ^ 3 / \<bar>sqrt (\<sigma>\<^sup>2 * real n)\<bar>)) \<longlonglongrightarrow> 0" 98 using \<sigma>_pos 99 by (auto simp: real_sqrt_mult min_absorb2 100 intro!: tendsto_min[THEN tendsto_eq_rhs] sqrt_at_top[THEN filterlim_compose] 101 filterlim_tendsto_pos_mult_at_top filterlim_at_top_imp_at_infinity 102 tendsto_divide_0 filterlim_real_sequentially) 103 then have "(\<lambda>n. LINT x|\<mu>. min (6 * x\<^sup>2) (\<bar>?t n\<bar> * \<bar>x\<bar> ^ 3)) \<longlonglongrightarrow> (LINT x|\<mu>. 0)" 104 by (intro integral_dominated_convergence [where w = "\<lambda>x. 6 * x^2"]) auto 105 then have *: "(\<lambda>n. t\<^sup>2 / (6 * \<sigma>\<^sup>2) * (LINT x|\<mu>. min (6 * x\<^sup>2) (\<bar>?t n\<bar> * \<bar>x\<bar> ^ 3))) \<longlonglongrightarrow> 0" 106 by (simp only: integral_zero tendsto_mult_right_zero) 107 108 have "(\<lambda>n. complex_of_real ((1 + (-(t^2) / 2) / n)^n)) \<longlonglongrightarrow> complex_of_real (exp (-(t^2) / 2))" 109 by (rule isCont_tendsto_compose [OF _ tendsto_exp_limit_sequentially]) auto 110 then have "(\<lambda>n. \<phi> n t) \<longlonglongrightarrow> complex_of_real (exp (-(t^2) / 2))" 111 by (rule Lim_transform) (rule Lim_null_comparison [OF main *]) 112 then show "(\<lambda>n. char (distr M borel (?S' n)) t) \<longlonglongrightarrow> char std_normal_distribution t" 113 by (simp add: \<phi>_def char_std_normal_distribution) 114 qed (auto intro!: real_dist_normal_dist simp: S_def) 115qed 116 117theorem (in prob_space) central_limit_theorem: 118 fixes X :: "nat \<Rightarrow> 'a \<Rightarrow> real" 119 and \<mu> :: "real measure" 120 and \<sigma> :: real 121 and S :: "nat \<Rightarrow> 'a \<Rightarrow> real" 122 assumes X_indep: "indep_vars (\<lambda>i. borel) X UNIV" 123 and X_integrable: "\<And>n. integrable M (X n)" 124 and X_mean: "\<And>n. expectation (X n) = m" 125 and \<sigma>_pos: "\<sigma> > 0" 126 and X_square_integrable: "\<And>n. integrable M (\<lambda>x. (X n x)\<^sup>2)" 127 and X_variance: "\<And>n. variance (X n) = \<sigma>\<^sup>2" 128 and X_distrib: "\<And>n. distr M borel (X n) = \<mu>" 129 defines "X' i x \<equiv> X i x - m" 130 shows "weak_conv_m (\<lambda>n. distr M borel (\<lambda>x. (\<Sum>i<n. X' i x) / sqrt (n*\<sigma>\<^sup>2))) std_normal_distribution" 131proof (intro central_limit_theorem_zero_mean) 132 show "indep_vars (\<lambda>i. borel) X' UNIV" 133 unfolding X'_def[abs_def] using X_indep by (rule indep_vars_compose2) auto 134 show "integrable M (X' n)" "expectation (X' n) = 0" for n 135 using X_integrable X_mean by (auto simp: X'_def[abs_def] prob_space) 136 show "\<sigma> > 0" "integrable M (\<lambda>x. (X' n x)\<^sup>2)" "variance (X' n) = \<sigma>\<^sup>2" for n 137 using \<open>0 < \<sigma>\<close> X_integrable X_mean X_square_integrable X_variance unfolding X'_def 138 by (auto simp: prob_space power2_diff) 139 show "distr M borel (X' n) = distr \<mu> borel (\<lambda>x. x - m)" for n 140 unfolding X_distrib[of n, symmetric] using X_integrable 141 by (subst distr_distr) (auto simp: X'_def[abs_def] comp_def) 142qed 143 144end 145