1(* 2Author: Tobias Nipkow, Daniel St��we 3Largely derived from AFP entry AVL. 4*) 5 6section "AVL Tree Implementation of Sets" 7 8theory AVL_Set 9imports 10 Cmp 11 Isin2 12 "HOL-Number_Theory.Fib" 13begin 14 15type_synonym 'a avl_tree = "('a,nat) tree" 16 17definition empty :: "'a avl_tree" where 18"empty = Leaf" 19 20text \<open>Invariant:\<close> 21 22fun avl :: "'a avl_tree \<Rightarrow> bool" where 23"avl Leaf = True" | 24"avl (Node l a h r) = 25 ((height l = height r \<or> height l = height r + 1 \<or> height r = height l + 1) \<and> 26 h = max (height l) (height r) + 1 \<and> avl l \<and> avl r)" 27 28fun ht :: "'a avl_tree \<Rightarrow> nat" where 29"ht Leaf = 0" | 30"ht (Node l a h r) = h" 31 32definition node :: "'a avl_tree \<Rightarrow> 'a \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where 33"node l a r = Node l a (max (ht l) (ht r) + 1) r" 34 35definition balL :: "'a avl_tree \<Rightarrow> 'a \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where 36"balL l a r = 37 (if ht l = ht r + 2 then 38 case l of 39 Node bl b _ br \<Rightarrow> 40 if ht bl < ht br then 41 case br of 42 Node cl c _ cr \<Rightarrow> node (node bl b cl) c (node cr a r) 43 else node bl b (node br a r) 44 else node l a r)" 45 46definition balR :: "'a avl_tree \<Rightarrow> 'a \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where 47"balR l a r = 48 (if ht r = ht l + 2 then 49 case r of 50 Node bl b _ br \<Rightarrow> 51 if ht bl > ht br then 52 case bl of 53 Node cl c _ cr \<Rightarrow> node (node l a cl) c (node cr b br) 54 else node (node l a bl) b br 55 else node l a r)" 56 57fun insert :: "'a::linorder \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where 58"insert x Leaf = Node Leaf x 1 Leaf" | 59"insert x (Node l a h r) = (case cmp x a of 60 EQ \<Rightarrow> Node l a h r | 61 LT \<Rightarrow> balL (insert x l) a r | 62 GT \<Rightarrow> balR l a (insert x r))" 63 64fun split_max :: "'a avl_tree \<Rightarrow> 'a avl_tree * 'a" where 65"split_max (Node l a _ r) = 66 (if r = Leaf then (l,a) else let (r',a') = split_max r in (balL l a r', a'))" 67 68lemmas split_max_induct = split_max.induct[case_names Node Leaf] 69 70fun del_root :: "'a avl_tree \<Rightarrow> 'a avl_tree" where 71"del_root (Node Leaf a h r) = r" | 72"del_root (Node l a h Leaf) = l" | 73"del_root (Node l a h r) = (let (l', a') = split_max l in balR l' a' r)" 74 75lemmas del_root_cases = del_root.cases[case_names Leaf_t Node_Leaf Node_Node] 76 77fun delete :: "'a::linorder \<Rightarrow> 'a avl_tree \<Rightarrow> 'a avl_tree" where 78"delete _ Leaf = Leaf" | 79"delete x (Node l a h r) = 80 (case cmp x a of 81 EQ \<Rightarrow> del_root (Node l a h r) | 82 LT \<Rightarrow> balR (delete x l) a r | 83 GT \<Rightarrow> balL l a (delete x r))" 84 85 86subsection \<open>Functional Correctness Proofs\<close> 87 88text\<open>Very different from the AFP/AVL proofs\<close> 89 90 91subsubsection "Proofs for insert" 92 93lemma inorder_balL: 94 "inorder (balL l a r) = inorder l @ a # inorder r" 95by (auto simp: node_def balL_def split:tree.splits) 96 97lemma inorder_balR: 98 "inorder (balR l a r) = inorder l @ a # inorder r" 99by (auto simp: node_def balR_def split:tree.splits) 100 101theorem inorder_insert: 102 "sorted(inorder t) \<Longrightarrow> inorder(insert x t) = ins_list x (inorder t)" 103by (induct t) 104 (auto simp: ins_list_simps inorder_balL inorder_balR) 105 106 107subsubsection "Proofs for delete" 108 109lemma inorder_split_maxD: 110 "\<lbrakk> split_max t = (t',a); t \<noteq> Leaf \<rbrakk> \<Longrightarrow> 111 inorder t' @ [a] = inorder t" 112by(induction t arbitrary: t' rule: split_max.induct) 113 (auto simp: inorder_balL split: if_splits prod.splits tree.split) 114 115lemma inorder_del_root: 116 "inorder (del_root (Node l a h r)) = inorder l @ inorder r" 117by(cases "Node l a h r" rule: del_root.cases) 118 (auto simp: inorder_balL inorder_balR inorder_split_maxD split: if_splits prod.splits) 119 120theorem inorder_delete: 121 "sorted(inorder t) \<Longrightarrow> inorder (delete x t) = del_list x (inorder t)" 122by(induction t) 123 (auto simp: del_list_simps inorder_balL inorder_balR 124 inorder_del_root inorder_split_maxD split: prod.splits) 125 126 127subsection \<open>AVL invariants\<close> 128 129text\<open>Essentially the AFP/AVL proofs\<close> 130 131 132subsubsection \<open>Insertion maintains AVL balance\<close> 133 134declare Let_def [simp] 135 136lemma [simp]: "avl t \<Longrightarrow> ht t = height t" 137by (induct t) simp_all 138 139lemma height_balL: 140 "\<lbrakk> height l = height r + 2; avl l; avl r \<rbrakk> \<Longrightarrow> 141 height (balL l a r) = height r + 2 \<or> 142 height (balL l a r) = height r + 3" 143by (cases l) (auto simp:node_def balL_def split:tree.split) 144 145lemma height_balR: 146 "\<lbrakk> height r = height l + 2; avl l; avl r \<rbrakk> \<Longrightarrow> 147 height (balR l a r) = height l + 2 \<or> 148 height (balR l a r) = height l + 3" 149by (cases r) (auto simp add:node_def balR_def split:tree.split) 150 151lemma [simp]: "height(node l a r) = max (height l) (height r) + 1" 152by (simp add: node_def) 153 154lemma avl_node: 155 "\<lbrakk> avl l; avl r; 156 height l = height r \<or> height l = height r + 1 \<or> height r = height l + 1 157 \<rbrakk> \<Longrightarrow> avl(node l a r)" 158by (auto simp add:max_def node_def) 159 160lemma height_balL2: 161 "\<lbrakk> avl l; avl r; height l \<noteq> height r + 2 \<rbrakk> \<Longrightarrow> 162 height (balL l a r) = (1 + max (height l) (height r))" 163by (cases l, cases r) (simp_all add: balL_def) 164 165lemma height_balR2: 166 "\<lbrakk> avl l; avl r; height r \<noteq> height l + 2 \<rbrakk> \<Longrightarrow> 167 height (balR l a r) = (1 + max (height l) (height r))" 168by (cases l, cases r) (simp_all add: balR_def) 169 170lemma avl_balL: 171 assumes "avl l" "avl r" and "height l = height r \<or> height l = height r + 1 172 \<or> height r = height l + 1 \<or> height l = height r + 2" 173 shows "avl(balL l a r)" 174proof(cases l) 175 case Leaf 176 with assms show ?thesis by (simp add: node_def balL_def) 177next 178 case Node 179 with assms show ?thesis 180 proof(cases "height l = height r + 2") 181 case True 182 from True Node assms show ?thesis 183 by (auto simp: balL_def intro!: avl_node split: tree.split) arith+ 184 next 185 case False 186 with assms show ?thesis by (simp add: avl_node balL_def) 187 qed 188qed 189 190lemma avl_balR: 191 assumes "avl l" and "avl r" and "height l = height r \<or> height l = height r + 1 192 \<or> height r = height l + 1 \<or> height r = height l + 2" 193 shows "avl(balR l a r)" 194proof(cases r) 195 case Leaf 196 with assms show ?thesis by (simp add: node_def balR_def) 197next 198 case Node 199 with assms show ?thesis 200 proof(cases "height r = height l + 2") 201 case True 202 from True Node assms show ?thesis 203 by (auto simp: balR_def intro!: avl_node split: tree.split) arith+ 204 next 205 case False 206 with assms show ?thesis by (simp add: balR_def avl_node) 207 qed 208qed 209 210(* It appears that these two properties need to be proved simultaneously: *) 211 212text\<open>Insertion maintains the AVL property:\<close> 213 214theorem avl_insert: 215 assumes "avl t" 216 shows "avl(insert x t)" 217 "(height (insert x t) = height t \<or> height (insert x t) = height t + 1)" 218using assms 219proof (induction t) 220 case (Node l a h r) 221 case 1 222 show ?case 223 proof(cases "x = a") 224 case True with Node 1 show ?thesis by simp 225 next 226 case False 227 show ?thesis 228 proof(cases "x<a") 229 case True with Node 1 show ?thesis by (auto simp add:avl_balL) 230 next 231 case False with Node 1 \<open>x\<noteq>a\<close> show ?thesis by (auto simp add:avl_balR) 232 qed 233 qed 234 case 2 235 show ?case 236 proof(cases "x = a") 237 case True with Node 1 show ?thesis by simp 238 next 239 case False 240 show ?thesis 241 proof(cases "x<a") 242 case True 243 show ?thesis 244 proof(cases "height (insert x l) = height r + 2") 245 case False with Node 2 \<open>x < a\<close> show ?thesis by (auto simp: height_balL2) 246 next 247 case True 248 hence "(height (balL (insert x l) a r) = height r + 2) \<or> 249 (height (balL (insert x l) a r) = height r + 3)" (is "?A \<or> ?B") 250 using Node 2 by (intro height_balL) simp_all 251 thus ?thesis 252 proof 253 assume ?A with 2 \<open>x < a\<close> show ?thesis by (auto) 254 next 255 assume ?B with True 1 Node(2) \<open>x < a\<close> show ?thesis by (simp) arith 256 qed 257 qed 258 next 259 case False 260 show ?thesis 261 proof(cases "height (insert x r) = height l + 2") 262 case False with Node 2 \<open>\<not>x < a\<close> show ?thesis by (auto simp: height_balR2) 263 next 264 case True 265 hence "(height (balR l a (insert x r)) = height l + 2) \<or> 266 (height (balR l a (insert x r)) = height l + 3)" (is "?A \<or> ?B") 267 using Node 2 by (intro height_balR) simp_all 268 thus ?thesis 269 proof 270 assume ?A with 2 \<open>\<not>x < a\<close> show ?thesis by (auto) 271 next 272 assume ?B with True 1 Node(4) \<open>\<not>x < a\<close> show ?thesis by (simp) arith 273 qed 274 qed 275 qed 276 qed 277qed simp_all 278 279 280subsubsection \<open>Deletion maintains AVL balance\<close> 281 282lemma avl_split_max: 283 assumes "avl x" and "x \<noteq> Leaf" 284 shows "avl (fst (split_max x))" "height x = height(fst (split_max x)) \<or> 285 height x = height(fst (split_max x)) + 1" 286using assms 287proof (induct x rule: split_max_induct) 288 case (Node l a h r) 289 case 1 290 thus ?case using Node 291 by (auto simp: height_balL height_balL2 avl_balL split:prod.split) 292next 293 case (Node l a h r) 294 case 2 295 let ?r' = "fst (split_max r)" 296 from \<open>avl x\<close> Node 2 have "avl l" and "avl r" by simp_all 297 thus ?case using Node 2 height_balL[of l ?r' a] height_balL2[of l ?r' a] 298 apply (auto split:prod.splits simp del:avl.simps) by arith+ 299qed auto 300 301lemma avl_del_root: 302 assumes "avl t" and "t \<noteq> Leaf" 303 shows "avl(del_root t)" 304using assms 305proof (cases t rule:del_root_cases) 306 case (Node_Node ll ln lh lr n h rl rn rh rr) 307 let ?l = "Node ll ln lh lr" 308 let ?r = "Node rl rn rh rr" 309 let ?l' = "fst (split_max ?l)" 310 from \<open>avl t\<close> and Node_Node have "avl ?r" by simp 311 from \<open>avl t\<close> and Node_Node have "avl ?l" by simp 312 hence "avl(?l')" "height ?l = height(?l') \<or> 313 height ?l = height(?l') + 1" by (rule avl_split_max,simp)+ 314 with \<open>avl t\<close> Node_Node have "height ?l' = height ?r \<or> height ?l' = height ?r + 1 315 \<or> height ?r = height ?l' + 1 \<or> height ?r = height ?l' + 2" by fastforce 316 with \<open>avl ?l'\<close> \<open>avl ?r\<close> have "avl(balR ?l' (snd(split_max ?l)) ?r)" 317 by (rule avl_balR) 318 with Node_Node show ?thesis by (auto split:prod.splits) 319qed simp_all 320 321lemma height_del_root: 322 assumes "avl t" and "t \<noteq> Leaf" 323 shows "height t = height(del_root t) \<or> height t = height(del_root t) + 1" 324using assms 325proof (cases t rule: del_root_cases) 326 case (Node_Node ll ln lh lr n h rl rn rh rr) 327 let ?l = "Node ll ln lh lr" 328 let ?r = "Node rl rn rh rr" 329 let ?l' = "fst (split_max ?l)" 330 let ?t' = "balR ?l' (snd(split_max ?l)) ?r" 331 from \<open>avl t\<close> and Node_Node have "avl ?r" by simp 332 from \<open>avl t\<close> and Node_Node have "avl ?l" by simp 333 hence "avl(?l')" by (rule avl_split_max,simp) 334 have l'_height: "height ?l = height ?l' \<or> height ?l = height ?l' + 1" using \<open>avl ?l\<close> by (intro avl_split_max) auto 335 have t_height: "height t = 1 + max (height ?l) (height ?r)" using \<open>avl t\<close> Node_Node by simp 336 have "height t = height ?t' \<or> height t = height ?t' + 1" using \<open>avl t\<close> Node_Node 337 proof(cases "height ?r = height ?l' + 2") 338 case False 339 show ?thesis using l'_height t_height False 340 by (subst height_balR2[OF \<open>avl ?l'\<close> \<open>avl ?r\<close> False])+ arith 341 next 342 case True 343 show ?thesis 344 proof(cases rule: disjE[OF height_balR[OF True \<open>avl ?l'\<close> \<open>avl ?r\<close>, of "snd (split_max ?l)"]]) 345 case 1 thus ?thesis using l'_height t_height True by arith 346 next 347 case 2 thus ?thesis using l'_height t_height True by arith 348 qed 349 qed 350 thus ?thesis using Node_Node by (auto split:prod.splits) 351qed simp_all 352 353text\<open>Deletion maintains the AVL property:\<close> 354 355theorem avl_delete: 356 assumes "avl t" 357 shows "avl(delete x t)" and "height t = (height (delete x t)) \<or> height t = height (delete x t) + 1" 358using assms 359proof (induct t) 360 case (Node l n h r) 361 case 1 362 show ?case 363 proof(cases "x = n") 364 case True with Node 1 show ?thesis by (auto simp:avl_del_root) 365 next 366 case False 367 show ?thesis 368 proof(cases "x<n") 369 case True with Node 1 show ?thesis by (auto simp add:avl_balR) 370 next 371 case False with Node 1 \<open>x\<noteq>n\<close> show ?thesis by (auto simp add:avl_balL) 372 qed 373 qed 374 case 2 375 show ?case 376 proof(cases "x = n") 377 case True 378 with 1 have "height (Node l n h r) = height(del_root (Node l n h r)) 379 \<or> height (Node l n h r) = height(del_root (Node l n h r)) + 1" 380 by (subst height_del_root,simp_all) 381 with True show ?thesis by simp 382 next 383 case False 384 show ?thesis 385 proof(cases "x<n") 386 case True 387 show ?thesis 388 proof(cases "height r = height (delete x l) + 2") 389 case False with Node 1 \<open>x < n\<close> show ?thesis by(auto simp: balR_def) 390 next 391 case True 392 hence "(height (balR (delete x l) n r) = height (delete x l) + 2) \<or> 393 height (balR (delete x l) n r) = height (delete x l) + 3" (is "?A \<or> ?B") 394 using Node 2 by (intro height_balR) auto 395 thus ?thesis 396 proof 397 assume ?A with \<open>x < n\<close> Node 2 show ?thesis by(auto simp: balR_def) 398 next 399 assume ?B with \<open>x < n\<close> Node 2 show ?thesis by(auto simp: balR_def) 400 qed 401 qed 402 next 403 case False 404 show ?thesis 405 proof(cases "height l = height (delete x r) + 2") 406 case False with Node 1 \<open>\<not>x < n\<close> \<open>x \<noteq> n\<close> show ?thesis by(auto simp: balL_def) 407 next 408 case True 409 hence "(height (balL l n (delete x r)) = height (delete x r) + 2) \<or> 410 height (balL l n (delete x r)) = height (delete x r) + 3" (is "?A \<or> ?B") 411 using Node 2 by (intro height_balL) auto 412 thus ?thesis 413 proof 414 assume ?A with \<open>\<not>x < n\<close> \<open>x \<noteq> n\<close> Node 2 show ?thesis by(auto simp: balL_def) 415 next 416 assume ?B with \<open>\<not>x < n\<close> \<open>x \<noteq> n\<close> Node 2 show ?thesis by(auto simp: balL_def) 417 qed 418 qed 419 qed 420 qed 421qed simp_all 422 423 424subsection "Overall correctness" 425 426interpretation S: Set_by_Ordered 427where empty = empty and isin = isin and insert = insert and delete = delete 428and inorder = inorder and inv = avl 429proof (standard, goal_cases) 430 case 1 show ?case by (simp add: empty_def) 431next 432 case 2 thus ?case by(simp add: isin_set_inorder) 433next 434 case 3 thus ?case by(simp add: inorder_insert) 435next 436 case 4 thus ?case by(simp add: inorder_delete) 437next 438 case 5 thus ?case by (simp add: empty_def) 439next 440 case 6 thus ?case by (simp add: avl_insert(1)) 441next 442 case 7 thus ?case by (simp add: avl_delete(1)) 443qed 444 445 446subsection \<open>Height-Size Relation\<close> 447 448text \<open>Based on theorems by Daniel St\"uwe, Manuel Eberl and Peter Lammich.\<close> 449 450lemma height_invers: 451 "(height t = 0) = (t = Leaf)" 452 "avl t \<Longrightarrow> (height t = Suc h) = (\<exists> l a r . t = Node l a (Suc h) r)" 453by (induction t) auto 454 455text \<open>Any AVL tree of height \<open>h\<close> has at least \<open>fib (h+2)\<close> leaves:\<close> 456 457lemma avl_fib_bound: "avl t \<Longrightarrow> height t = h \<Longrightarrow> fib (h+2) \<le> size1 t" 458proof (induction h arbitrary: t rule: fib.induct) 459 case 1 thus ?case by (simp add: height_invers) 460next 461 case 2 thus ?case by (cases t) (auto simp: height_invers) 462next 463 case (3 h) 464 from "3.prems" obtain l a r where 465 [simp]: "t = Node l a (Suc(Suc h)) r" "avl l" "avl r" 466 and C: " 467 height r = Suc h \<and> height l = Suc h 468 \<or> height r = Suc h \<and> height l = h 469 \<or> height r = h \<and> height l = Suc h" (is "?C1 \<or> ?C2 \<or> ?C3") 470 by (cases t) (simp, fastforce) 471 { 472 assume ?C1 473 with "3.IH"(1) 474 have "fib (h + 3) \<le> size1 l" "fib (h + 3) \<le> size1 r" 475 by (simp_all add: eval_nat_numeral) 476 hence ?case by (auto simp: eval_nat_numeral) 477 } moreover { 478 assume ?C2 479 hence ?case using "3.IH"(1)[of r] "3.IH"(2)[of l] by auto 480 } moreover { 481 assume ?C3 482 hence ?case using "3.IH"(1)[of l] "3.IH"(2)[of r] by auto 483 } ultimately show ?case using C by blast 484qed 485 486lemma fib_alt_induct [consumes 1, case_names 1 2 rec]: 487 assumes "n > 0" "P 1" "P 2" "\<And>n. n > 0 \<Longrightarrow> P n \<Longrightarrow> P (Suc n) \<Longrightarrow> P (Suc (Suc n))" 488 shows "P n" 489 using assms(1) 490proof (induction n rule: fib.induct) 491 case (3 n) 492 thus ?case using assms by (cases n) (auto simp: eval_nat_numeral) 493qed (insert assms, auto) 494 495text \<open>An exponential lower bound for @{const fib}:\<close> 496 497lemma fib_lowerbound: 498 defines "\<phi> \<equiv> (1 + sqrt 5) / 2" 499 defines "c \<equiv> 1 / \<phi> ^ 2" 500 assumes "n > 0" 501 shows "real (fib n) \<ge> c * \<phi> ^ n" 502proof - 503 have "\<phi> > 1" by (simp add: \<phi>_def) 504 hence "c > 0" by (simp add: c_def) 505 from \<open>n > 0\<close> show ?thesis 506 proof (induction n rule: fib_alt_induct) 507 case (rec n) 508 have "c * \<phi> ^ Suc (Suc n) = \<phi> ^ 2 * (c * \<phi> ^ n)" 509 by (simp add: field_simps power2_eq_square) 510 also have "\<dots> \<le> (\<phi> + 1) * (c * \<phi> ^ n)" 511 by (rule mult_right_mono) (insert \<open>c > 0\<close>, simp_all add: \<phi>_def power2_eq_square field_simps) 512 also have "\<dots> = c * \<phi> ^ Suc n + c * \<phi> ^ n" 513 by (simp add: field_simps) 514 also have "\<dots> \<le> real (fib (Suc n)) + real (fib n)" 515 by (intro add_mono rec.IH) 516 finally show ?case by simp 517 qed (insert \<open>\<phi> > 1\<close>, simp_all add: c_def power2_eq_square eval_nat_numeral) 518qed 519 520text \<open>The size of an AVL tree is (at least) exponential in its height:\<close> 521 522lemma avl_size_lowerbound: 523 defines "\<phi> \<equiv> (1 + sqrt 5) / 2" 524 assumes "avl t" 525 shows "\<phi> ^ (height t) \<le> size1 t" 526proof - 527 have "\<phi> > 0" by(simp add: \<phi>_def add_pos_nonneg) 528 hence "\<phi> ^ height t = (1 / \<phi> ^ 2) * \<phi> ^ (height t + 2)" 529 by(simp add: field_simps power2_eq_square) 530 also have "\<dots> \<le> fib (height t + 2)" 531 using fib_lowerbound[of "height t + 2"] by(simp add: \<phi>_def) 532 also have "\<dots> \<le> size1 t" 533 using avl_fib_bound[of t "height t"] assms by simp 534 finally show ?thesis . 535qed 536 537text \<open>The height of an AVL tree is most @{term "(1/log 2 \<phi>)"} \<open>\<approx> 1.44\<close> times worse 538than @{term "log 2 (size1 t)"}:\<close> 539 540lemma avl_height_upperbound: 541 defines "\<phi> \<equiv> (1 + sqrt 5) / 2" 542 assumes "avl t" 543 shows "height t \<le> (1/log 2 \<phi>) * log 2 (size1 t)" 544proof - 545 have "\<phi> > 0" "\<phi> > 1" by(auto simp: \<phi>_def pos_add_strict) 546 hence "height t = log \<phi> (\<phi> ^ height t)" by(simp add: log_nat_power) 547 also have "\<dots> \<le> log \<phi> (size1 t)" 548 using avl_size_lowerbound[OF assms(2), folded \<phi>_def] \<open>1 < \<phi>\<close> by simp 549 also have "\<dots> = (1/log 2 \<phi>) * log 2 (size1 t)" 550 by(simp add: log_base_change[of 2 \<phi>]) 551 finally show ?thesis . 552qed 553 554end 555