1(*---------------------------------------------------------------------------*) 2(* Set up context *) 3(*---------------------------------------------------------------------------*) 4 5open HolKernel Parse boolLib bossLib numLib; 6open optionTheory pairTheory relationTheory arithmeticTheory 7 pred_setTheory bagTheory containerTheory 8 listTheory rich_listTheory stringTheory sortingTheory mergesortTheory 9 comparisonTheory balanced_mapTheory regexp_mapTheory osetTheory 10 finite_mapTheory vec_mapTheory charsetTheory regexpTheory 11; 12 13 14val _ = numLib.prefer_num(); 15 16fun pat_elim q = Q.PAT_X_ASSUM q (K ALL_TAC); 17 18val byA = BasicProvers.byA; 19infix byA; 20 21val subst_all_tac = SUBST_ALL_TAC; 22val simp_rule = SIMP_RULE; 23 24val SET_EQ_THM = FUN_EQ_THM; 25 26val list_ss = list_ss ++ rewrites [LENGTH_NIL, LENGTH_NIL_SYM]; 27 28val set_ss = list_ss ++ pred_setLib.PRED_SET_ss ++ rewrites [SET_EQ_THM,IN_DEF] 29 30val comparison_distinct = TypeBase.distinct_of ``:ordering`` 31 32(*---------------------------------------------------------------------------*) 33(* Trivial lemmas *) 34(*---------------------------------------------------------------------------*) 35 36Triviality INTER_DELETE : 37 !A a. A INTER (A DELETE a) = A DELETE a 38Proof 39 SET_TAC [] 40QED 41 42Triviality IN_SET_UNION : 43 !x s t. 44 x IN s ==> (s UNION t = s UNION (x INSERT t)) 45Proof 46 SET_TAC [] 47QED 48 49Triviality leq_thm : 50 transitive (<=) /\ total (<=) /\ antisymmetric (<=) 51Proof 52 srw_tac [ARITH_ss] [transitive_def, total_def, antisymmetric_def] 53QED 54 55Theorem MAX_LE_THM[local] : 56 !m n. m <= MAX m n /\ n <= MAX m n 57Proof 58 RW_TAC arith_ss [MAX_DEF] 59QED 60 61Theorem IS_SOME_EXISTS[local] : 62 !x. IS_SOME x = ?y. x = SOME y 63Proof 64 Cases THEN METIS_TAC [optionTheory.IS_SOME_DEF] 65QED 66 67Triviality length_mergesort : 68 !l R. LENGTH (mergesort R l) = LENGTH l 69Proof 70 metis_tac[mergesort_perm,PERM_LENGTH] 71QED 72 73Triviality LENGTH_NIL_SYM = 74 GEN_ALL (CONV_RULE (LHS_CONV SYM_CONV) (SPEC_ALL LENGTH_NIL)); 75 76Theorem string_to_numlist[local] : 77 !s. (!c. c IN set s ==> ORD c IN set ALPHABET) 78 ==> 79 ?nl. (s = MAP CHR nl) /\ 80 (!n. n IN set nl ==> n IN set ALPHABET) 81Proof 82 Induct >> rw [] 83 >> `?nl. (s = MAP CHR nl) /\ !n. MEM n nl ==> MEM n ALPHABET` by metis_tac[] 84 >> `?n. (h = CHR n) /\ n < 256` by metis_tac [CHR_ONTO] 85 >> Q.EXISTS_TAC `n::nl` 86 >> rw [] 87 >> metis_tac [ORD_CHR_RWT,alphabet_size_def] 88QED 89 90Theorem all_ord_string[local] : 91 EVERY (\c. MEM (ORD c) ALPHABET) s 92 <=> 93 EVERY (\c. ORD c < alphabet_size) s 94Proof 95 simp_tac list_ss [mem_alphabet_iff] 96QED 97 98(*---------------------------------------------------------------------------*) 99(* Let's get started *) 100(*---------------------------------------------------------------------------*) 101 102val _ = new_theory "regexp_compiler"; 103 104(*---------------------------------------------------------------------------*) 105(* Output of the compiler is in terms of vectors. *) 106(*---------------------------------------------------------------------------*) 107 108val _ = Hol_datatype `vector = Vector of 'a list`; 109 110Definition fromList_def : fromList l = Vector l 111End 112 113Definition sub_def : sub (Vector l) n = EL n l 114End 115 116Definition length_def : length (Vector l) = LENGTH l 117End 118 119Theorem fromList_Vector = SIMP_RULE list_ss [GSYM FUN_EQ_THM] fromList_def; 120 121(*---------------------------------------------------------------------------*) 122(* Operations on the set of seen states *) 123(*---------------------------------------------------------------------------*) 124 125val _ = Parse.type_abbrev("regexp_set", ``:(regexp,unit)balanced_map``); 126 127Definition insert_regexp_def : 128 insert_regexp r seen = 129 balanced_map$insert regexp_compare r () seen 130End 131 132Definition mem_regexp_def : 133 mem_regexp r seen = 134 balanced_map$member regexp_compare r seen 135End 136 137(*---------------------------------------------------------------------------*) 138(* Transitions out of a state (regexp). *) 139(*---------------------------------------------------------------------------*) 140 141Definition transitions_def : 142 transitions r = MAP (\c. (c,smart_deriv c r)) ALPHABET 143End 144 145(*---------------------------------------------------------------------------*) 146(* Special case for Empty (failure state) *) 147(*---------------------------------------------------------------------------*) 148 149Definition Empty_arcs_def : 150 Empty_arcs = 151 [(0,Empty); (1,Empty); (2,Empty); (3,Empty); 152 (4,Empty); (5,Empty); (6,Empty); (7,Empty); 153 (8,Empty); (9,Empty); (10,Empty); (11,Empty); 154 (12,Empty); (13,Empty); (14,Empty); (15,Empty); 155 (16,Empty); (17,Empty); (18,Empty); (19,Empty); 156 (20,Empty); (21,Empty); (22,Empty); (23,Empty); 157 (24,Empty); (25,Empty); (26,Empty); (27,Empty); 158 (28,Empty); (29,Empty); (30,Empty); (31,Empty); 159 (32,Empty); (33,Empty); (34,Empty); (35,Empty); 160 (36,Empty); (37,Empty); (38,Empty); (39,Empty); 161 (40,Empty); (41,Empty); (42,Empty); (43,Empty); 162 (44,Empty); (45,Empty); (46,Empty); (47,Empty); 163 (48,Empty); (49,Empty); (50,Empty); (51,Empty); 164 (52,Empty); (53,Empty); (54,Empty); (55,Empty); 165 (56,Empty); (57,Empty); (58,Empty); (59,Empty); 166 (60,Empty); (61,Empty); (62,Empty); (63,Empty); 167 (64,Empty); (65,Empty); (66,Empty); (67,Empty); 168 (68,Empty); (69,Empty); (70,Empty); (71,Empty); 169 (72,Empty); (73,Empty); (74,Empty); (75,Empty); 170 (76,Empty); (77,Empty); (78,Empty); (79,Empty); 171 (80,Empty); (81,Empty); (82,Empty); (83,Empty); 172 (84,Empty); (85,Empty); (86,Empty); (87,Empty); 173 (88,Empty); (89,Empty); (90,Empty); (91,Empty); 174 (92,Empty); (93,Empty); (94,Empty); (95,Empty); 175 (96,Empty); (97,Empty); (98,Empty); (99,Empty); 176 (100,Empty); (101,Empty) ; (102,Empty); (103,Empty); 177 (104,Empty); (105,Empty); 178 (106,Empty); (107,Empty); (108,Empty); (109,Empty); 179 (110,Empty); (111,Empty); (112,Empty); (113,Empty); 180 (114,Empty); (115,Empty); (116,Empty); (117,Empty); 181 (118,Empty); (119,Empty); (120,Empty); (121,Empty); 182 (122,Empty); (123,Empty); (124,Empty); (125,Empty); 183 (126,Empty); (127,Empty); (128,Empty); (129,Empty); 184 (130,Empty); (131,Empty); (132,Empty); (133,Empty); 185 (134,Empty); (135,Empty); (136,Empty); (137,Empty); 186 (138,Empty); (139,Empty); (140,Empty); (141,Empty); 187 (142,Empty); (143,Empty); (144,Empty); (145,Empty); 188 (146,Empty); (147,Empty); (148,Empty); (149,Empty); 189 (150,Empty); (151,Empty); (152,Empty); (153,Empty); 190 (154,Empty); (155,Empty); (156,Empty); (157,Empty); 191 (158,Empty); (159,Empty); (160,Empty); (161,Empty); 192 (162,Empty); (163,Empty); (164,Empty); (165,Empty); 193 (166,Empty); (167,Empty); (168,Empty); (169,Empty); 194 (170,Empty); (171,Empty); (172,Empty); (173,Empty); 195 (174,Empty); (175,Empty); (176,Empty); (177,Empty); 196 (178,Empty); (179,Empty); (180,Empty); (181,Empty); 197 (182,Empty); (183,Empty); (184,Empty); (185,Empty); 198 (186,Empty); (187,Empty); (188,Empty); (189,Empty); 199 (190,Empty); (191,Empty); (192,Empty); (193,Empty); 200 (194,Empty); (195,Empty); (196,Empty); (197,Empty); 201 (198,Empty); (199,Empty); (200,Empty); (201,Empty); 202 (202,Empty); (203,Empty); (204,Empty); (205,Empty); 203 (206,Empty); (207,Empty); (208,Empty); (209,Empty); 204 (210,Empty); (211,Empty); (212,Empty); (213,Empty); 205 (214,Empty); (215,Empty); (216,Empty); (217,Empty); 206 (218,Empty); (219,Empty); (220,Empty); (221,Empty); 207 (222,Empty); (223,Empty); (224,Empty); (225,Empty); 208 (226,Empty); (227,Empty); (228,Empty); (229,Empty); 209 (230,Empty); (231,Empty); (232,Empty); (233,Empty); 210 (234,Empty); (235,Empty); (236,Empty); (237,Empty); 211 (238,Empty); (239,Empty); (240,Empty); (241,Empty); 212 (242,Empty); (243,Empty); (244,Empty); (245,Empty); 213 (246,Empty); (247,Empty); (248,Empty); (249,Empty); 214 (250,Empty); (251,Empty); (252,Empty); (253,Empty); 215 (254,Empty); (255,Empty)] 216End; 217 218Theorem Empty_arcs_thm : 219 MAP (\c. (c,smart_deriv c Empty)) ALPHABET = Empty_arcs 220Proof 221 rw_tac (srw_ss()) 222 [ALPHABET_def, Empty_arcs_def, smart_deriv_def, 223 Empty_def,Epsilon_def,charset_mem_empty] 224QED 225 226Theorem transitions_thm : 227 transitions r = if r = Empty then Empty_arcs 228 else MAP (\c. (c,smart_deriv c r)) ALPHABET 229Proof 230 rw_tac list_ss [transitions_def] >> metis_tac [Empty_arcs_thm] 231QED 232 233(*---------------------------------------------------------------------------*) 234(* Given the outgoing arcs (c_1,r_1) ... (c_n,r_n) from node r, add all the *) 235(* not-yet-seen r_i regexps to the state map and return the row as a list of *) 236(* (c_i, state_map(r_i)) pairs *) 237(*---------------------------------------------------------------------------*) 238 239Definition extend_states_def : 240 (extend_states next_state state_map row [] = (next_state,state_map,row)) /\ 241 (extend_states next_state state_map row ((c,r')::t) = 242 case lookup regexp_compare r' state_map 243 of SOME n => extend_states next_state state_map ((c,n)::row) t 244 | NONE => extend_states 245 (next_state + 1n) 246 (insert regexp_compare r' next_state state_map) 247 ((c,next_state)::row) 248 t) 249End 250 251(*---------------------------------------------------------------------------*) 252(* Compute the row corresponding to state r and cons it on to the table *) 253(* under construction. Also keep the state_map uptodate. *) 254(*---------------------------------------------------------------------------*) 255 256Definition build_table_def : 257 build_table arcs r (next_state,state_map,table) = 258 let (next_state',state_map',row) = 259 extend_states next_state state_map [] arcs 260 in case lookup regexp_compare r state_map' 261 of SOME n => (next_state', state_map', (n,row)::table) 262 | NONE => (next_state' + 1n, 263 insert regexp_compare r next_state' state_map', 264 (next_state',row)::table) 265End 266 267(*---------------------------------------------------------------------------*) 268(* The core regexp compiler. The seen argument holds the already-seen *) 269(* regexps, which map to states in the final DFA. The n argument is a *) 270(* step-index used to ensure that the function terminates. *) 271(*---------------------------------------------------------------------------*) 272 273Definition Brz_def : 274 Brz seen work acc n = 275 if n <= 0n then 276 NONE 277 else 278 if null work then 279 SOME (seen,acc) 280 else 281 let (m,t) = deleteFindMin work; 282 r = FST m; 283 in if mem_regexp r seen then 284 Brz seen t acc (n-1) 285 else 286 let arcs = transitions r 287 in Brz (insert_regexp r seen) 288 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 289 (build_table arcs r acc) 290 (n-1) 291End 292 293Definition MAXNUM_32_def : 294 MAXNUM_32 = 2147483647n 295End 296 297(*---------------------------------------------------------------------------*) 298(* Build Greve-style Brz function *) 299(*---------------------------------------------------------------------------*) 300 301(*---------------------------------------------------------------------------*) 302(* Domain of the function. *) 303(*---------------------------------------------------------------------------*) 304 305Definition dom_Brz_def : 306 dom_Brz seen work acc = ?d. IS_SOME(Brz seen work acc d) 307End 308 309(*---------------------------------------------------------------------------*) 310(* Create measure function rdepth. Uses following code copied from *) 311(* *) 312(* HOLDIR/src/pfl/defchoose *) 313(* *) 314(*---------------------------------------------------------------------------*) 315 316(*---------------------------------------------------------------------------*) 317(* MINCHOOSE (sname, cname,``!x1 ... xn. ?z. M[x1,...,xn,z]``) *) 318(* returns *) 319(* *) 320(* |- !x1...xn z. M[x1,...,xn,z] ==> *) 321(* M[x1,...,xn,cname x1...xn] /\ *) 322(* !m. M[x1,...,xn,m] ==> cname x1...xn <= m *) 323(* *) 324(* where cname in the theorem is a constant. This theorem is stored in the *) 325(* current theory under sname. Thus this rule is a form of the *) 326(* well-ordering property. *) 327(*---------------------------------------------------------------------------*) 328 329Theorem WOP_ALT[local] : 330 !P. (?n. P n) ==> ?min. P min /\ !k. P k ==> min <= k 331Proof 332 METIS_TAC [arithmeticTheory.WOP,DECIDE ``~(m<n) ==> n<=m``] 333QED 334 335fun MINCHOOSE (store_name,const_name,tm) = 336 let val (V,body) = strip_forall tm 337 val P = snd(dest_comb body) 338 val wop_thm = BETA_RULE(SPEC P WOP_ALT) 339 val min_term = snd(dest_imp (concl wop_thm)) 340 val min_term_pred = snd(dest_comb min_term) 341 val th1 = BETA_RULE(GSYM (ISPECL [body,min_term_pred] RIGHT_EXISTS_IMP_THM)) 342 val th2 = EQ_MP th1 wop_thm 343 val th3 = GENL V th2 344 val th4 = Ho_Rewrite.REWRITE_RULE[SKOLEM_THM] th3 345 val th5 = Ho_Rewrite.REWRITE_RULE[GSYM LEFT_FORALL_IMP_THM] th4 346 in 347 new_specification(store_name, [const_name], th5) 348 end 349 handle e => raise wrap_exn "" "MINCHOOSE" e; 350 351val rdepth_thm = 352 MINCHOOSE ("rdepth_thm", "rdepth", 353 ``!seen work acc. ?d. IS_SOME(Brz seen work acc d)``) 354 355(*---------------------------------------------------------------------------*) 356(* Define Brzozo *) 357(*---------------------------------------------------------------------------*) 358 359Definition Brzozo_def : 360 Brzozo seen work acc = 361 THE (Brz seen work acc (rdepth seen work acc)) 362End 363 364 365(*---------------------------------------------------------------------------*) 366(* Lemmas about Brz and definedness *) 367(*---------------------------------------------------------------------------*) 368 369Theorem IS_SOME_Brz[local] : 370 !d seen work acc. 371 IS_SOME (Brz seen work acc d) ==> d <> 0 372Proof 373 Cases >> rw[Once Brz_def] 374QED 375 376Theorem Brz_SOME[local] : 377 !d seen work acc res. 378 (Brz seen work acc d = SOME res) ==> d <> 0 379Proof 380 METIS_TAC [IS_SOME_Brz,IS_SOME_EXISTS] 381QED 382 383Theorem Brz_dlem[local] : 384 !d seen work acc. 385 IS_SOME (Brz seen work acc d) 386 ==> 387 (Brz seen work acc d = Brz seen work acc (SUC d)) 388Proof 389 Ho_Rewrite.REWRITE_TAC [IS_SOME_EXISTS,GSYM LEFT_FORALL_IMP_THM] 390 >> Induct 391 >- metis_tac [Brz_SOME] 392 >- (rw_tac bool_ss [] 393 >> pop_assum (mp_tac o SYM) 394 >> ONCE_REWRITE_TAC [Brz_def] 395 >> rw_tac list_ss [LET_THM] 396 >> Cases_on `(deleteFindMin work)` 397 >> BasicProvers.NORM_TAC list_ss [] 398 >> fs[]) 399QED 400 401Theorem Brz_monotone[local] : 402 !d1 d2 seen work acc. 403 IS_SOME(Brz seen work acc d1) /\ d1 <= d2 404 ==> (Brz seen work acc d1 = Brz seen work acc d2) 405Proof 406 RW_TAC arith_ss [LESS_EQ_EXISTS] THEN 407 Induct_on `p` THEN METIS_TAC [ADD_CLAUSES,Brz_dlem] 408QED 409 410Theorem Brz_norm[local] : 411 !d seen work acc. 412 IS_SOME(Brz seen work acc d) 413 ==> 414 (Brz seen work acc d = Brz seen work acc (rdepth seen work acc)) 415Proof 416 METIS_TAC [Brz_monotone,rdepth_thm] 417QED 418 419Theorem Brz_determ : 420 !d1 d2 seen work acc. 421 IS_SOME(Brz seen work acc d1) /\ IS_SOME(Brz seen work acc d2) 422 ==> (Brz seen work acc d1 = Brz seen work acc d2) 423Proof 424 METIS_TAC [Brz_norm] 425QED 426 427(*---------------------------------------------------------------------------*) 428(* Derive eqns for dom_Brz *) 429(*---------------------------------------------------------------------------*) 430 431Theorem lem[local] : 432 !seen work acc. 433 null work ==> IS_SOME (Brz seen work acc 1) 434Proof 435 RW_TAC arith_ss [Once Brz_def] 436QED 437 438Theorem dom_base_case[local] : 439 !seen work acc. 440 null work ==> dom_Brz seen work acc 441Proof 442 METIS_TAC [dom_Brz_def, lem] 443QED 444 445Theorem step2_lem1a[local] : 446 !seen work acc r x t. 447 ~null work /\ 448 (deleteFindMin work = ((r,x),t)) /\ 449 mem_regexp r seen /\ dom_Brz seen work acc 450 ==> dom_Brz seen t acc 451Proof 452 rw_tac std_ss [dom_Brz_def] 453 >> `d<>0` by metis_tac [IS_SOME_Brz] 454 >> qexists_tac `d-1` 455 >> qpat_x_assum `IS_SOME arg` (mp_tac o ONCE_REWRITE_RULE [Brz_def]) 456 >> rw_tac arith_ss [LET_THM] 457QED 458 459Theorem step2_lem1b[local] : 460 !seen work acc r x t arcs. 461 ~null work /\ 462 (deleteFindMin work = ((r,x),t)) /\ 463 ~mem_regexp r seen /\ 464 (arcs = transitions r) /\ 465 dom_Brz seen work acc 466 ==> dom_Brz (insert_regexp r seen) 467 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 468 (build_table arcs r acc) 469Proof 470 rw_tac std_ss [dom_Brz_def] 471 >> `d<>0` by metis_tac [IS_SOME_Brz] 472 >> qexists_tac `d-1` 473 >> qpat_x_assum `IS_SOME arg` (mp_tac o ONCE_REWRITE_RULE [Brz_def]) 474 >> rw_tac arith_ss [LET_THM] 475QED 476 477Theorem step2_lem2a[local] : 478 !seen work acc r x t. 479 ~null work /\ 480 (deleteFindMin work = ((r,x),t)) /\ 481 mem_regexp r seen /\ dom_Brz seen t acc 482 ==> dom_Brz seen work acc 483Proof 484 rw_tac std_ss [dom_Brz_def] 485 >> Q.EXISTS_TAC `SUC d` 486 >> rw [Once Brz_def] 487QED 488 489Theorem step2_lem2b[local] : 490 !seen work acc r x t arcs. 491 ~null work /\ 492 (deleteFindMin work = ((r,x),t)) /\ 493 (arcs = transitions r) /\ 494 ~mem_regexp r seen /\ 495 dom_Brz (insert_regexp r seen) 496 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 497 (build_table arcs r acc) 498 ==> dom_Brz seen work acc 499Proof 500 rw_tac std_ss [dom_Brz_def] 501 >> Q.EXISTS_TAC `SUC d` 502 >> rw [Once Brz_def,LET_THM] 503QED 504 505 506(*---------------------------------------------------------------------------*) 507(* Equational characterization of dom. *) 508(*---------------------------------------------------------------------------*) 509 510Theorem dom_Brz_eqns : 511 dom_Brz seen work acc = 512 if null work then 513 T 514 else let (m,t) = deleteFindMin work ; 515 r = FST m; 516 in 517 if mem_regexp r seen then 518 dom_Brz seen t acc 519 else let arcs = transitions r 520 in dom_Brz (insert_regexp r seen) 521 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 522 (build_table arcs r acc) 523Proof 524 Cases_on `null work` 525 >- metis_tac[dom_base_case] 526 >- (asm_simp_tac list_ss [] 527 >> Cases_on `deleteFindMin work` 528 >> rw [] 529 >> Cases_on `q` 530 >> fs[] 531 >- metis_tac [step2_lem1a,step2_lem2a] 532 >- metis_tac [step2_lem1b,step2_lem2b]) 533QED 534 535(*---------------------------------------------------------------------------*) 536(* Optimization: dom_Brz doesn't depend on its "acc" argument, so can be *) 537(* replaced by a version that doesn't have it. *) 538(*---------------------------------------------------------------------------*) 539 540Definition dom_Brz_alt_def : 541 dom_Brz_alt seen work = dom_Brz seen work ARB 542End 543 544Theorem acc_irrelevant[local] : 545 !n seen work acc acc1. 546 IS_SOME (Brz seen work acc n) ==> IS_SOME (Brz seen work acc1 n) 547Proof 548 Induct 549 >- rw [Once Brz_def] 550 >- (ONCE_REWRITE_TAC [Brz_def] 551 >> rw_tac list_ss [LET_THM] 552 >> Cases_on `(deleteFindMin work)` 553 >> fs [] 554 >> metis_tac[]) 555QED 556 557Theorem dom_Brz_alt_equal : 558 !seen work acc. 559 dom_Brz seen work acc = dom_Brz_alt seen work 560Proof 561 rw [dom_Brz_def, dom_Brz_alt_def] >> metis_tac [acc_irrelevant] 562QED 563 564Theorem dom_Brz_alt_eqns : 565 dom_Brz_alt seen work = 566 if null work then 567 T 568 else let (m,t) = deleteFindMin work ; 569 r = FST m; 570 in 571 if mem_regexp r seen then 572 dom_Brz_alt seen t 573 else let arcs = transitions r 574 in dom_Brz_alt (insert_regexp r seen) 575 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 576Proof 577 metis_tac [SIMP_RULE bool_ss [dom_Brz_alt_equal] dom_Brz_eqns] 578QED 579 580(*---------------------------------------------------------------------------*) 581(* Recursion equations for Brzozo *) 582(*---------------------------------------------------------------------------*) 583 584Theorem Brzozo_base[local] : 585 !seen work acc. 586 dom_Brz seen work acc /\ null work 587 ==> 588 (Brzozo seen work acc = (seen,acc)) 589Proof 590 rw_tac std_ss [Brzozo_def,dom_Brz_def] 591 >> `rdepth seen work acc <> 0` 592 by METIS_TAC [IS_SOME_Brz,rdepth_thm] 593 >> rw_tac arith_ss [Once Brz_def] 594QED 595 596Theorem Brzozo_rec1[local] : 597 !seen work accr r x t. 598 ~null work /\ 599 (deleteFindMin work = ((r,x),t)) /\ 600 dom_Brz seen work acc /\ 601 mem_regexp r seen 602 ==> 603 (Brzozo seen work acc = Brzozo seen t acc) 604Proof 605 rw_tac std_ss [Brzozo_def,dom_Brz_def] 606 >> `d <> 0` by METIS_TAC [IS_SOME_Brz] 607 >> `Brz seen work acc d = Brz seen t acc (d-1)` 608 by rw_tac list_ss [SimpLHS,Once Brz_def,LET_THM] 609 >> metis_tac [IS_SOME_EXISTS,NOT_SOME_NONE,THE_DEF,Brz_norm] 610QED 611 612Theorem Brzozo_rec2[local] : 613 !seen work accr r x t. 614 ~null work /\ 615 (deleteFindMin work = ((r,x),t)) /\ 616 dom_Brz seen work acc /\ 617 ~mem_regexp r seen 618 ==> 619 (Brzozo seen work acc = 620 let arcs = transitions r 621 in Brzozo (insert_regexp r seen) 622 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 623 (build_table arcs r acc)) 624Proof 625 rw_tac std_ss [Brzozo_def,dom_Brz_def,LET_THM] 626 >> `d <> 0` by METIS_TAC [IS_SOME_Brz] 627 >> `Brz seen work acc d = 628 let arcs = transitions r 629 in Brz (insert_regexp r seen) 630 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 631 (build_table arcs r acc) (d-1)` 632 by rw_tac list_ss [SimpLHS,Once Brz_def,LET_THM] 633 >> METIS_TAC [IS_SOME_EXISTS,NOT_SOME_NONE,THE_DEF,Brz_norm,LET_THM] 634QED 635 636(*---------------------------------------------------------------------------*) 637(* Equational characterization of Brzozo. *) 638(*---------------------------------------------------------------------------*) 639 640Theorem Brzozo_eqns[local] : 641 !seen work acc. 642 dom_Brz seen work acc 643 ==> 644 Brzozo seen work acc = 645 if null work then 646 (seen,acc) 647 else 648 let (rp,t) = deleteFindMin work; 649 r = FST rp; 650 in 651 if mem_regexp r seen then 652 Brzozo seen t acc 653 else 654 let arcs = transitions r 655 in 656 Brzozo (insert_regexp r seen) 657 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 658 (build_table arcs r acc) 659Proof 660 rw_tac list_ss [LET_THM] 661 >- metis_tac [Brzozo_base] 662 >- (Cases_on `deleteFindMin work` 663 >> Cases_on `q` 664 >> fs[] 665 >> metis_tac [Brzozo_rec1,Brzozo_rec2]) 666QED 667 668(*---------------------------------------------------------------------------*) 669(* Now prove induction theorem. This is based on using rdepth as a measure *) 670(* on the recursion. Thus we first have some lemmas about how rdepth *) 671(* decreases in recursive calls. *) 672(*---------------------------------------------------------------------------*) 673 674Theorem step3a_lt[local] : 675 !seen work acc r x t. 676 ~null work /\ 677 (deleteFindMin work = ((r,x),t)) /\ 678 mem_regexp r seen /\ 679 dom_Brz seen work acc 680 ==> rdepth seen t acc < rdepth seen work acc 681Proof 682 rw_tac std_ss [dom_Brz_def] THEN IMP_RES_TAC rdepth_thm THEN 683 `rdepth seen work acc <> 0` by METIS_TAC [IS_SOME_Brz] THEN 684 `rdepth seen work acc - 1 < rdepth seen work acc` by DECIDE_TAC THEN 685 `IS_SOME (Brz seen t acc (rdepth seen work acc - 1))` 686 by (qpat_x_assum `IS_SOME (Brz seen work acc (rdepth seen work acc))` 687 (mp_tac o SIMP_RULE arith_ss [Once Brz_def]) 688 >> rw[LET_THM]) 689 >> `rdepth seen t acc <= rdepth seen work acc - 1` 690 by metis_tac [rdepth_thm] 691 >> decide_tac 692QED 693 694Theorem step3b_lt[local] : 695 !seen work acc r x t arcs. 696 ~null work /\ 697 (deleteFindMin work = ((r,x),t)) /\ 698 ~mem_regexp r seen /\ 699 (arcs = transitions r) /\ dom_Brz seen work acc 700 ==> rdepth (insert_regexp r seen) 701 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 702 (build_table arcs r acc) 703 < 704 rdepth seen work acc 705Proof 706 rw_tac std_ss [dom_Brz_def] THEN IMP_RES_TAC rdepth_thm 707 >> `rdepth seen work acc <> 0` by METIS_TAC [IS_SOME_Brz] 708 >> `rdepth seen work acc - 1 < rdepth seen work acc` by DECIDE_TAC 709 >> `IS_SOME (Brz (insert_regexp r seen) 710 (FOLDL (\work a. insert_regexp (SND a) work) t (transitions r)) 711 (build_table (transitions r) r acc) 712 (rdepth seen work acc - 1))` 713 by (qpat_x_assum `IS_SOME (Brz seen work acc (rdepth seen work acc))` 714 (mp_tac o SIMP_RULE arith_ss [Once Brz_def]) 715 >> rw[LET_THM]) 716 >> `rdepth (insert_regexp r seen) 717 (FOLDL (\work a. insert_regexp (SND a) work) t (transitions r)) 718 (build_table (transitions r) r acc) <= rdepth seen work acc - 1` 719 by metis_tac [rdepth_thm] 720 >> decide_tac 721QED 722 723(*---------------------------------------------------------------------------*) 724(* Induction for Brzozo is obtained by instantiating the WF induction *) 725(* theorem with the rdepth measure and then simplifying. *) 726(*---------------------------------------------------------------------------*) 727 728Theorem ind_lemA = 729 prim_recTheory.WF_measure 730 |> Q.ISPEC `\(seen,work,acc). rdepth seen work acc` 731 |> MATCH_MP relationTheory.WF_INDUCTION_THM 732 |> simp_rule std_ss [prim_recTheory.measure_thm] 733 |> Q.ISPEC `\(seen,work,acc). 734 dom_Brz seen work acc ==> P seen work acc` 735 |> simp_rule std_ss [pairTheory.FORALL_PROD] 736; 737 738Theorem ind_lemB[local] : 739 !P. (!seen work acc. 740 (!a b c. rdepth a b c < rdepth seen work acc 741 ==> dom_Brz a b c 742 ==> P a b c) 743 ==> dom_Brz seen work acc ==> P seen work acc) 744 ==> 745 !seen work acc. dom_Brz seen work acc ==> P seen work acc 746Proof 747rpt strip_tac 748 >> pop_assum mp_tac 749 >> `?a b c. acc = (a,b,c)` by metis_tac [ABS_PAIR_THM] 750 >> pop_assum SUBST_ALL_TAC 751 >> MAP_EVERY Q.ID_SPEC_TAC [`a`,`b`, `c`,`work`,`seen`] 752 >> match_mp_tac ind_lemA 753 >> fs [FORALL_PROD] 754QED 755 756Theorem Brzozowski_ind : 757 !P. 758 (!seen work acc. 759 dom_Brz seen work acc /\ 760 (!r x t. ~null work /\ 761 (deleteFindMin work = ((r,x),t)) /\ 762 mem_regexp r seen ==> P seen t acc) /\ 763 (!r x t arcs. 764 ~null work /\ (deleteFindMin work = ((r,x),t)) /\ 765 (arcs = transitions r) /\ ~mem_regexp r seen 766 ==> P (insert_regexp r seen) 767 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 768 (build_table arcs r acc)) 769 ==> P seen work acc) 770 ==> !seen work acc. dom_Brz seen work acc ==> P seen work acc 771Proof 772 gen_tac 773 >> strip_tac 774 >> ho_match_mp_tac ind_lemB 775 >> simp_tac bool_ss [AND_IMP_INTRO] 776 >> rpt gen_tac 777 >> disch_then (fn th => first_x_assum match_mp_tac >> assume_tac th) 778 >> rw [] 779 >> (first_x_assum match_mp_tac 780 >> asm_simp_tac list_ss [step3a_lt,step3b_lt] 781 >> mp_tac dom_Brz_eqns 782 >> rw []) 783QED 784 785(*---------------------------------------------------------------------------*) 786(* Efficient executable version of Brzozo *) 787(*---------------------------------------------------------------------------*) 788 789Definition exec_Brz_def : 790 exec_Brz seen work acc d = 791 if d=0n then 792 (if dom_Brz seen work acc then Brzozo seen work acc else ARB) 793 else 794 if null work then 795 (seen,acc) 796 else 797 let (m,t) = deleteFindMin work ; 798 r = FST m; 799 in 800 if mem_regexp r seen then 801 exec_Brz seen t acc (d - 1) 802 else 803 let arcs = transitions r 804 in exec_Brz (insert_regexp r seen) 805 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 806 (build_table arcs r acc) (d - 1) 807End 808 809Theorem exec_Brz_equals_Brzozo[local] : 810 !d seen work acc. 811 dom_Brz seen work acc 812 ==> 813 (exec_Brz seen work acc d = Brzozo seen work acc) 814Proof 815 Induct 816 >> rw_tac std_ss [Once exec_Brz_def,LET_THM] 817 >- metis_tac [Brzozo_eqns] 818 >- (`?r x t. deleteFindMin work = ((r,x),t)` by metis_tac [ABS_PAIR_THM] 819 >> rw [Brzozo_eqns] 820 >> (first_x_assum match_mp_tac 821 >> qpat_x_assum `dom_Brz seen work acc` 822 (mp_tac o SIMP_RULE list_ss [Once dom_Brz_eqns]) 823 >> rw [])) 824QED 825 826(*---------------------------------------------------------------------------*) 827(* The final definition of the core functionality of the compiler *) 828(*---------------------------------------------------------------------------*) 829 830Definition Brzozowski_def : 831 Brzozowski seen work acc = 832 if dom_Brz seen work acc then 833 Brzozo seen work acc 834 else 835 exec_Brz seen work acc MAXNUM_32 836End 837 838(*---------------------------------------------------------------------------*) 839(* Theorem showing that Brzozowski can be executed w.o. termination proof. *) 840(* In order to evaluate Brzozowski, we can dispatch to exec_Brz. *) 841(*---------------------------------------------------------------------------*) 842 843Theorem Brzozowski_exec_Brz : 844 Brzozowski seen work acc = exec_Brz seen work acc MAXNUM_32 845Proof 846 rw_tac std_ss [Brzozowski_def,exec_Brz_equals_Brzozo] 847QED 848 849(*---------------------------------------------------------------------------*) 850(* In order to reason about Brzozowski, we use the following theorem. *) 851(*---------------------------------------------------------------------------*) 852 853Theorem Brzozowski_eqns : 854 dom_Brz seen work acc 855 ==> 856 Brzozowski seen work acc = 857 if null work then 858 (seen,acc) 859 else let (m,t) = deleteFindMin work; 860 r = FST m; 861 in if mem_regexp r seen then 862 Brzozowski seen t acc 863 else 864 let arcs = transitions r 865 in Brzozowski (insert_regexp r seen) 866 (FOLDL (\work a. insert_regexp (SND a) work) t arcs) 867 (build_table arcs r acc) 868Proof 869rw_tac std_ss [Brzozowski_def,LET_THM] 870 >- metis_tac [Brzozo_eqns] 871 >- (`?r x t. deleteFindMin work = ((r,x),t)` by metis_tac [ABS_PAIR_THM] 872 >> rw [] 873 >> rw [SimpLHS, Once Brzozo_eqns] 874 >> (qpat_x_assum `dom_Brz seen work acc` 875 (mp_tac o SIMP_RULE list_ss [Once dom_Brz_eqns]) 876 >> rw [])) 877QED 878 879(*---------------------------------------------------------------------------*) 880(* Definitions that find the accept states and map the final results to *) 881(* vector form. *) 882(*---------------------------------------------------------------------------*) 883 884Definition get_accepts_def : 885 get_accepts fmap = 886 mergesort (\a b:num. a <= b) 887 (balanced_map$foldrWithKey 888 (\r (n:num) nlist. if nullable r then n::nlist else nlist) 889 [] fmap) 890End 891 892Definition accepts_to_vector_def: 893 accepts_to_vector accept_states max_state = 894 alist_to_vec (MAP (\x. (x,T)) accept_states) F max_state max_state 895End 896 897Definition table_to_vectors_def: 898 table_to_vectors table = 899 MAP (\(k:num,table2). 900 alist_to_vec (mergesort (inv_image (\a b:num. a <= b) FST) 901 (MAP (\(c,n). (c, SOME n)) table2)) 902 NONE alphabet_size alphabet_size) 903 (mergesort (inv_image (\a b:num. a <= b) FST) table) 904End 905 906(*---------------------------------------------------------------------------*) 907(* The basic regexp compiler function *) 908(*---------------------------------------------------------------------------*) 909 910Definition compile_regexp_def : 911 compile_regexp r = 912 let r' = normalize r ; 913 (states,last_state,state_numbering,table) 914 = Brzozowski balanced_map$empty 915 (balanced_map$singleton r' ()) 916 (1n, balanced_map$singleton r' 0n, []) ; 917 delta_vecs = table_to_vectors table ; 918 accepts_vec = accepts_to_vector (get_accepts state_numbering) last_state 919 in 920 (state_numbering, delta_vecs, accepts_vec) 921End 922 923(*---------------------------------------------------------------------------*) 924(* DFA evaluator *) 925(*---------------------------------------------------------------------------*) 926 927Definition exec_dfa_def : 928 exec_dfa finals table n s = 929 case s of 930 | "" => sub finals n 931 | c::t => 932 case sub (sub table n) (ORD c) of 933 | NONE => F 934 | SOME k => exec_dfa finals table k t 935End 936 937Theorem exec_dfa_thm : 938 (exec_dfa finals table n [] = sub finals n) /\ 939 (exec_dfa finals table n (c::t) = 940 case sub (sub table n) (ORD c) of 941 | NONE => F 942 | SOME k => exec_dfa finals table k t) 943Proof 944 CONJ_TAC 945 >- rw_tac list_ss [exec_dfa_def] 946 >- rw_tac list_ss [SimpLHS, Once exec_dfa_def] 947QED 948 949(*---------------------------------------------------------------------------*) 950(* Set of strings accepted by DFA generated from regexp r *) 951(*---------------------------------------------------------------------------*) 952 953Definition Brz_lang_def : 954 Brz_lang r = 955 let (state_map,table_vec,finals_vec) = compile_regexp r ; 956 tableV = fromList (MAP fromList table_vec) ; 957 finalsV = fromList finals_vec ; 958 in 959 exec_dfa finalsV tableV 960 (THE (lookup regexp_compare (normalize r) state_map)) 961End 962 963 964(*---------------------------------------------------------------------------*) 965(* Combines the DFA compilation and the DFA evaluator. Returns a function of *) 966(* type :string -> bool *) 967(*---------------------------------------------------------------------------*) 968 969Definition regexp_matcher_def : 970 regexp_matcher r = 971 let (state_map,delta,accepts) = compile_regexp r ; 972 start_state = THE (lookup regexp_compare (normalize r) state_map) ; 973 acceptsV = fromList accepts ; 974 deltaV = fromList (MAP fromList delta) ; 975 in 976 exec_dfa acceptsV deltaV start_state 977End 978 979(*---------------------------------------------------------------------------*) 980(* Properties of get_accepts *) 981(*---------------------------------------------------------------------------*) 982 983Theorem sorted_get_accepts : 984 !fmap. 985 SORTED $<= (get_accepts fmap) 986Proof 987 RW_TAC set_ss [get_accepts_def] THEN 988 CONV_TAC (DEPTH_CONV ETA_CONV) THEN 989 MATCH_MP_TAC mergesort_sorted THEN 990 RW_TAC arith_ss [transitive_def, total_def] 991QED 992 993 994Theorem mem_get_acceptsA[local] : 995 !fmap P x acc. 996 balanced_map$invariant regexp_compare fmap /\ 997 MEM x (foldrWithKey 998 (\r n nlist. if P r then n::nlist else nlist) 999 acc fmap) 1000 ==> 1001 MEM x acc \/ ?r. P r /\ (lookup regexp_compare r fmap = SOME x) 1002Proof 1003 Induct THEN RW_TAC std_ss [] 1004 >- full_simp_tac list_ss [foldrWithKey_def,lookup_def] 1005 >- (simp_tac list_ss [lookup_bin] 1006 >> 1007 `invariant regexp_compare fmap /\ 1008 invariant regexp_compare fmap' /\ 1009 key_ordered regexp_compare k fmap Greater /\ 1010 key_ordered regexp_compare k fmap' Less` 1011 by metis_tac [invariant_def] 1012 >> qpat_x_assum `invariant cmp (Bin n k v fmap fmap')` (K ALL_TAC) 1013 >> fs [] 1014 >> RULE_ASSUM_TAC (REWRITE_RULE [foldrWithKey_def]) 1015 >> RES_THEN MP_TAC >> BETA_TAC >> rw[] 1016 >- metis_tac [regexp_compare_eq] 1017 >- metis_tac [key_less_lookup,regexp_compare_good,good_cmp_thm] 1018 >- metis_tac [key_greater_lookup,regexp_compare_good,good_cmp_thm] 1019 >- metis_tac [key_less_lookup,regexp_compare_good,good_cmp_thm] 1020 >- metis_tac [key_greater_lookup,regexp_compare_good,good_cmp_thm]) 1021QED 1022 1023Theorem mem_foldrWithKey_mono[local] : 1024 !P fmap x acc. 1025 MEM x acc 1026 ==> 1027 MEM x (foldrWithKey 1028 (\r n nlist. if P r then n::nlist else nlist) 1029 acc fmap) 1030Proof 1031 gen_tac 1032 >> Induct 1033 >> RW_TAC std_ss [foldrWithKey_def] 1034 >> metis_tac [MEM] 1035QED 1036 1037Theorem mem_get_acceptsB[local] : 1038 !P fmap r x acc. 1039 P r /\ (lookup regexp_compare r fmap = SOME x) 1040 ==> 1041 MEM x (foldrWithKey (\r n nlist. if P r then n::nlist else nlist) acc fmap) 1042Proof 1043 gen_tac >> Induct 1044 >- rw_tac list_ss [lookup_def,foldrWithKey_def] 1045 >- (rw_tac list_ss [lookup_bin,regexp_compare_eq,foldrWithKey_def] 1046 >> metis_tac [mem_foldrWithKey_mono,MEM]) 1047QED 1048 1049Theorem mem_get_accepts[local] : 1050 !bmap x. 1051 invariant regexp_compare bmap 1052 ==> 1053 (MEM x (get_accepts bmap) 1054 <=> 1055 ?r. nullable r /\ 1056 (lookup regexp_compare r bmap = SOME x)) 1057Proof 1058 rw_tac list_ss [get_accepts_def,mergesort_mem] 1059 >> metis_tac [mem_get_acceptsA,mem_get_acceptsB,MEM] 1060QED 1061 1062(*---------------------------------------------------------------------------*) 1063(* Properties of extend_states *) 1064(*---------------------------------------------------------------------------*) 1065 1066Definition extend_states_inv_def : 1067 extend_states_inv next_state state_map table = 1068 (invariant regexp_compare state_map /\ 1069 fmap_inj regexp_compare state_map /\ 1070 (frange regexp_compare state_map = count next_state) /\ 1071 (!n. MEM n (MAP SND table) ==> n < next_state)) 1072End 1073 1074Theorem extend_states_inv[local] : 1075 !next_state state_map table states next_state' state_map' table'. 1076 (extend_states next_state state_map table states 1077 = (next_state',state_map',table')) /\ 1078 extend_states_inv next_state state_map table 1079 ==> 1080 extend_states_inv next_state' state_map' table' 1081Proof 1082 ho_match_mp_tac extend_states_ind 1083 >> rw [extend_states_def] 1084 >> BasicProvers.EVERY_CASE_TAC 1085 >> fs [] 1086 >> FIRST_X_ASSUM match_mp_tac 1087 >> rw [] 1088 >> fs [extend_states_inv_def, frange_def, 1089 MEM_MAP,EXTENSION,GSYM LEFT_FORALL_IMP_THM] 1090 >> `good_cmp regexp_compare /\ eq_cmp regexp_compare` 1091 by metis_tac [eq_cmp_regexp_compare,regexp_compare_good] 1092 >> rw_tac set_ss [] 1093 >- metis_tac [insert_thm] 1094 >- (fs [fmap_inj_def,lookup_insert_thm] 1095 >> rw [regexp_compare_eq,fdom_insert] 1096 >> metis_tac [DECIDE ``x < (x:num) ==> F``,eq_cmp_def]) 1097 >- (rw [lookup_insert_thm,regexp_compare_eq,EQ_IMP_THM] 1098 >- (pop_assum mp_tac >> rw_tac set_ss [] 1099 >> metis_tac [DECIDE ``x < x + 1n``,LESS_TRANS]) 1100 >- (`x < next_state \/ (x=next_state)` by DECIDE_TAC 1101 >> metis_tac [NOT_SOME_NONE])) 1102 >- rw_tac set_ss [] 1103 >- metis_tac [LESS_TRANS,DECIDE ``x < x+1n``,IN_DEF] 1104 >- (rw_tac set_ss [] >> metis_tac[]) 1105 >- metis_tac [LESS_TRANS,DECIDE ``x < x+1n``,IN_DEF] 1106QED 1107 1108 1109Theorem extend_states_thm : 1110 !next_state state_map table states next_state' state_map' table'. 1111 (extend_states next_state state_map table states 1112 = (next_state',state_map',table')) /\ 1113 invariant regexp_compare state_map 1114 ==> 1115 (fdom regexp_compare state_map' = 1116 (fdom regexp_compare state_map UNION set (MAP SND states))) /\ 1117 submap regexp_compare state_map state_map' /\ 1118 next_state <= next_state' /\ 1119 (table' = REVERSE 1120 (MAP (\(c,r). (c, fapply regexp_compare state_map' r)) states) 1121 ++ table) /\ 1122 invariant regexp_compare state_map' 1123Proof 1124 `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare] 1125 >> ho_match_mp_tac extend_states_ind 1126 >> rw [extend_states_def] 1127 >> BasicProvers.EVERY_CASE_TAC 1128 >> `invariant regexp_compare (insert regexp_compare r' next_state state_map)` 1129 by metis_tac [insert_thm,eq_cmp_def] 1130 >> fs [submap_id] 1131 >> res_tac 1132 >- (rw_tac set_ss [fdom_insert] >> metis_tac[]) 1133 >- (`r' IN fdom regexp_compare state_map` by rw [fdom_def] 1134 >> rw_tac set_ss [IN_SET_UNION]) 1135 >- (match_mp_tac submap_insert >> rw [fdom_def] >> metis_tac [NOT_SOME_NONE]) 1136 >- decide_tac 1137 >- (pat_elim `$! M` >> rw[] 1138 >> qpat_x_assum `submap _ __ ___` mp_tac 1139 >> rw_tac set_ss [submap_def,fapply_def] 1140 >> pop_assum (mp_tac o Q.SPEC `r'`) 1141 >> rw [fdom_insert] 1142 >> pop_assum mp_tac 1143 >> `good_cmp regexp_compare` by metis_tac [regexp_compare_good] 1144 >> rw [lookup_insert_thm,regexp_compare_eq] 1145 >> metis_tac [THE_DEF]) 1146 >- (pat_elim `$! M` >> rw[] 1147 >> qpat_x_assum `submap _ __ ___` mp_tac 1148 >> rw_tac set_ss [submap_def,fapply_def] 1149 >> pop_assum (mp_tac o Q.SPEC `r'`) 1150 >> rw [fdom_def] 1151 >> metis_tac [THE_DEF]) 1152QED 1153 1154(*---------------------------------------------------------------------------*) 1155(* What it means to be a good table *) 1156(*---------------------------------------------------------------------------*) 1157 1158Definition good_table_def : 1159 good_table state_map table = 1160 (ALL_DISTINCT (MAP FST table) /\ 1161 (!n table2. 1162 (ALOOKUP table n = SOME table2) 1163 ==> 1164 (set (MAP FST table2) = set ALPHABET)) /\ 1165 (!n c n' table2. 1166 (ALOOKUP table n = SOME table2) /\ 1167 (ALOOKUP table2 c = SOME n') 1168 ==> 1169 MEM c ALPHABET /\ 1170 ?r r'. 1171 (lookup regexp_compare r state_map = SOME n) /\ 1172 (lookup regexp_compare r' state_map = SOME n') /\ 1173 (r' = smart_deriv c r))) 1174End 1175 1176(*---------------------------------------------------------------------------*) 1177(* Some abbreviations *) 1178(*---------------------------------------------------------------------------*) 1179 1180Definition invar_def : invar = invariant regexp_compare 1181End 1182 1183Definition dom_def : dom = fdom regexp_compare 1184End 1185 1186Definition range_def : range = frange regexp_compare 1187End 1188 1189Definition union_def : union = ounion regexp_compare 1190End 1191 1192Definition set_def : set = oset regexp_compare 1193End 1194 1195Definition image_def : image = oimage regexp_compare 1196End 1197 1198Definition apply_def : apply = fapply regexp_compare 1199End 1200 1201Definition set_of_def : set_of oset = fapply regexp_compare 1202End 1203 1204Theorem dom_union[local] : 1205 !a b. 1206 invariant regexp_compare a /\ invariant regexp_compare b 1207 ==> 1208 (dom (union a b) = (dom a) UNION (dom b)) 1209Proof 1210 metis_tac [regexp_compare_good,fdom_ounion,dom_def,union_def] 1211QED 1212 1213(*---------------------------------------------------------------------------*) 1214(* Invariants on regexp compilation *) 1215(*---------------------------------------------------------------------------*) 1216 1217Definition Brz_invariant_def : 1218 Brz_invariant seen todo (next_state,state_map,table) <=> 1219 invar todo /\ invar state_map /\ invar seen /\ 1220 (!r. oin regexp_compare r todo ==> is_normalized r) /\ 1221 (!r. mem_regexp r seen ==> is_normalized r) /\ 1222 (!r. r IN dom state_map ==> is_normalized r) /\ 1223 (dom (union seen todo) = dom state_map) /\ 1224 (range state_map = count next_state) /\ 1225 (set (MAP FST table) = fdom num_cmp (oimage num_cmp (apply state_map) seen)) /\ 1226 fmap_inj regexp_compare state_map /\ 1227 good_table state_map table 1228End 1229 1230Theorem triv_lem[local] : 1231 !s1 s2. (s1 = s2) ==> ((s1 UNION s) = (s2 UNION s)) 1232Proof 1233 SET_TAC[] 1234QED 1235 1236Theorem Brz_inv_pres[local] : 1237 !todo1 x todos work seen next_state state_map table. 1238 ~null work /\ 1239 (deleteFindMin work = ((todo1,x),todos)) /\ 1240 Brz_invariant seen work (next_state,state_map,table) /\ 1241 ~mem_regexp todo1 seen 1242 ==> 1243 Brz_invariant (insert_regexp todo1 seen) 1244 (FOLDL (\work a. insert_regexp (SND a) work) todos (transitions todo1)) 1245 (build_table (transitions todo1) 1246 todo1 (next_state,state_map,table)) 1247Proof 1248 rw_tac list_ss [good_table_def,insert_regexp_def, invar_def, 1249 Brz_invariant_def, build_table_def, transitions_def,set_def] >> 1250 imp_res_tac extend_states_thm 1251 >> `todo1 IN dom work` 1252 by metis_tac [deleteFindMin_thm,EXTENSION,IN_UNION,IN_INSERT,dom_def] 1253 >> `todo1 IN dom state_map` by metis_tac [dom_union,IN_UNION] 1254 >> `todo1 IN dom state_map'` by metis_tac [submap_def,dom_def] 1255 >> pop_assum (strip_assume_tac o SIMP_RULE set_ss [dom_def, fdom_def]) 1256 >> rw [] 1257 >> rw [Brz_invariant_def, good_table_def,invar_def] 1258 (* 13 subgoals *) 1259 >- metis_tac [deleteFindMin_thm,invariant_foldl] 1260 >- metis_tac [insert_thm,regexp_compare_good] 1261 >- (`MEM r (MAP SND (MAP (\c. (c,smart_deriv c todo1)) ALPHABET)) \/ 1262 oin regexp_compare r todos` 1263 by metis_tac [oin_foldl_insert,deleteFindMin_thm] 1264 >- (fs [MEM_MAP,MAP_MAP_o,combinTheory.o_DEF] 1265 >> match_mp_tac smart_deriv_normalized 1266 >> `todo1 IN dom work` 1267 by metis_tac [deleteFindMin_thm,dom_def] 1268 >> `oin regexp_compare todo1 work` 1269 by metis_tac [oin_fdom,dom_def] 1270 >> metis_tac []) 1271 >- (fs [oin_fdom] 1272 >> `FDOM (to_fmap regexp_compare todos) = 1273 FDOM (DRESTRICT (to_fmap regexp_compare work) 1274 (FDOM (to_fmap regexp_compare work) DELETE 1275 key_set regexp_compare todo1))` 1276 by metis_tac [deleteFindMin,regexp_compare_good] 1277 >> fs [FDOM_DRESTRICT,INTER_DELETE] 1278 >> `{r} IN FDOM (to_fmap regexp_compare todos)` 1279 by metis_tac[SING_IN_FDOM,eq_cmp_regexp_compare,deleteFindMin_thm] 1280 >> rfs[] 1281 >> metis_tac [SING_IN_FDOM,eq_cmp_regexp_compare,deleteFindMin_thm]) 1282 ) 1283 >- metis_tac [member_insert,mem_regexp_def,eq_cmp_regexp_compare,insert_thm,eq_cmp_def] 1284 >- (`r IN fdom regexp_compare state_map \/ 1285 r IN set (MAP SND (MAP (\c. (c,smart_deriv c todo1)) ALPHABET))` 1286 by metis_tac [EXTENSION,dom_def,IN_UNION] 1287 >- metis_tac [dom_def] 1288 >- (fs [MAP_MAP_o,MEM_MAP] >> metis_tac [smart_deriv_normalized, dom_def])) 1289 >- (fs [GSYM dom_def] >> qpat_x_assum `dom _ = dom __` (assume_tac o SYM) 1290 >> rw [GSYM set_def] 1291 >> `invariant regexp_compare (insert regexp_compare todo1 () seen)` 1292 by metis_tac [insert_thm,regexp_compare_good] 1293 >> `dom (insert regexp_compare todo1 () seen) = 1294 dom (set [todo1]) UNION dom seen` 1295 by (rw [dom_def,fdom_insert,eq_cmp_regexp_compare,Once INSERT_SING_UNION] 1296 >> match_mp_tac triv_lem 1297 >> rw_tac bool_ss [EXTENSION,set_def] 1298 >> rw [in_dom_oset,eq_cmp_regexp_compare]) 1299 >> `invariant regexp_compare 1300 (FOLDL (\work' a. insert regexp_compare (SND a) () work') todos 1301 (MAP (\c. (c,smart_deriv c todo1)) ALPHABET))` 1302 by metis_tac [invariant_foldl,deleteFindMin_thm] 1303 >> rw [dom_union] 1304 >> rw [EXTENSION] 1305 >> `invariant regexp_compare todos` by metis_tac [deleteFindMin_thm] 1306 >> rw [oin_foldl_insert |> SIMP_RULE std_ss [oin_fdom,GSYM dom_def]] 1307 >> rw [AC DISJ_COMM DISJ_ASSOC] 1308 >> qsuff_tac `x IN dom work <=> (x IN dom (set [todo1]) \/ x IN dom todos)` 1309 >- metis_tac[] 1310 >> `dom work = {todo1} UNION dom todos` 1311 by metis_tac [deleteFindMin_thm,dom_def] 1312 >> qsuff_tac `{todo1} = dom (set [todo1])` 1313 >- metis_tac[IN_UNION,EXTENSION] 1314 >> simp_tac list_ss [dom_def,set_def,dom_oset_lem,eq_cmp_regexp_compare]) 1315 >- (`extend_states_inv next_state state_map ([]:(num,num) alist)` 1316 by (rw [extend_states_inv_def] >> metis_tac [range_def]) 1317 >> imp_res_tac extend_states_inv 1318 >> fs [extend_states_inv_def, count_def, EXTENSION, range_def]) 1319 >- (`todo1 IN dom state_map'` by (fs[submap_def] >> metis_tac[dom_def]) 1320 >> `eq_cmp num_cmp /\ eq_cmp regexp_compare` 1321 by metis_tac[eq_cmp_regexp_compare,num_cmp_good,num_cmp_antisym,eq_cmp_def] 1322 >> rw [GSYM oinsert_def,fdom_oimage_insert] 1323 >> `apply state_map' todo1 = x'` by rw [apply_def,fapply_def] 1324 >> pop_assum SUBST_ALL_TAC 1325 >> AP_TERM_TAC 1326 >> `dom seen SUBSET dom state_map` by 1327 (qpat_x_assum `dom _ = dom state_map` (SUBST_ALL_TAC o SYM) 1328 >> metis_tac [invariant_oset, eq_cmp_def,dom_union,SUBSET_UNION]) 1329 >> rw[fdom_oimage_inst,SET_EQ_THM,EQ_IMP_THM,oin_fdom] 1330 >> Q.EXISTS_TAC `x'''` >> rw[] 1331 >> `x''' IN dom state_map` by metis_tac [SUBSET_DEF,dom_def] 1332 >> `(x''' IN dom state_map') /\ 1333 (lookup regexp_compare x''' state_map = lookup regexp_compare x''' state_map')` 1334 by metis_tac [submap_def,dom_def] 1335 >> rw [apply_def,fapply_def]) 1336 >- (`extend_states_inv next_state state_map ([]:(num,num) alist)` 1337 by (rw [extend_states_inv_def] >> metis_tac [range_def]) 1338 >> imp_res_tac extend_states_inv 1339 >> pop_assum mp_tac 1340 >> rw [extend_states_inv_def]) 1341 >- (rw_tac set_ss [fdom_oimage_inst,GSYM IMP_DISJ_THM,oneTheory.one, 1342 oin_fdom,fdom_def,apply_def,fapply_def] 1343 >> strip_tac 1344 >> `fmap_inj regexp_compare state_map'` 1345 by (`extend_states_inv next_state state_map ([]:(num,num) alist)` 1346 by (rw [extend_states_inv_def] >> metis_tac [range_def]) 1347 >> imp_res_tac extend_states_inv 1348 >> pop_assum mp_tac 1349 >> rw [extend_states_inv_def]) 1350 >> `dom seen SUBSET dom state_map` by 1351 (`eq_cmp num_cmp /\ eq_cmp regexp_compare` 1352 by metis_tac[eq_cmp_regexp_compare, 1353 num_cmp_good,num_cmp_antisym,eq_cmp_def] 1354 >> qpat_x_assum `dom _ = dom state_map` (SUBST_ALL_TAC o SYM) 1355 >> metis_tac [invariant_oset, eq_cmp_def,dom_union,SUBSET_UNION]) 1356 >> pop_assum mp_tac 1357 >> rw_tac set_ss [dom_def, fdom_def, member_iff_lookup,SUBSET_DEF,oneTheory.one] 1358 >> Q.EXISTS_TAC `x''` >> rw[] 1359 >> strip_tac 1360 >> `x'' IN fdom regexp_compare state_map` by fs [SUBSET_DEF,dom_def, fdom_def] 1361 >> `(x'' IN fdom regexp_compare state_map') /\ 1362 (lookup regexp_compare x'' state_map = lookup regexp_compare x'' state_map')` 1363 by (fs [submap_def,fdom_def, member_iff_lookup] >> metis_tac []) 1364 >> fs [] 1365 >> `x'' <> todo1` by (fs[mem_regexp_def,member_iff_lookup] >> metis_tac[]) 1366 >> metis_tac [fmap_inj_def,eq_cmp_regexp_compare, eq_cmp_def]) 1367 >- (pop_assum mp_tac 1368 >> rw[MAP_MAP_o,combinTheory.o_DEF] 1369 >> fs[MAP_REVERSE,MAP_MAP_o,combinTheory.o_DEF] 1370 >> metis_tac[]) 1371 >- (qpat_x_assum `(if _ then __ else ___) = ____` mp_tac 1372 >> rw[MAP_MAP_o,combinTheory.o_DEF] 1373 >- (imp_res_tac alistTheory.ALOOKUP_MEM 1374 >> fs [MEM_REVERSE,MEM_MAP]) 1375 >- metis_tac[]) 1376 >- (qpat_x_assum `(if _ then __ else ___) = ____` mp_tac 1377 >> rw[MAP_MAP_o,combinTheory.o_DEF] 1378 >- (qexists_tac `todo1` >> rw[] 1379 >> imp_res_tac alistTheory.ALOOKUP_MEM 1380 >> pop_assum mp_tac 1381 >> rw_tac set_ss [MEM_REVERSE,MEM_MAP,fapply_def] 1382 >> `smart_deriv c todo1 IN fdom regexp_compare state_map'` 1383 by (rw [MAP_MAP_o,combinTheory.o_DEF,MEM_MAP] 1384 >> disj2_tac 1385 >> qexists_tac `c` >> rw[] 1386 >> metis_tac [IN_DEF]) 1387 >> fs [fdom_def, member_iff_lookup]) 1388 >- (fs[submap_def, fdom_def, member_iff_lookup] >> metis_tac[])) 1389QED 1390 1391Theorem Brz_inv_thm[local] : 1392 !seen work acc. 1393 dom_Brz seen work acc 1394 ==> 1395 !seen' acc'. 1396 Brz_invariant seen work acc /\ 1397 (Brzozowski seen work acc = (seen',acc')) 1398 ==> 1399 Brz_invariant seen' balanced_map$empty acc' 1400Proof 1401 simp_tac std_ss [empty_def] 1402 >> ho_match_mp_tac Brzozowski_ind 1403 >> rw [] 1404 >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac 1405 >> rw [Brzozowski_eqns] 1406 >- (Cases_on `work` >> fs[null_def]) 1407 >- (`?r work'. deleteFindMin work = ((r,()),work')` 1408 by metis_tac [ABS_PAIR_THM,oneTheory.one] 1409 >> fs[] 1410 >> BasicProvers.NORM_TAC set_ss [] 1411 >> first_x_assum match_mp_tac 1412 >> rw[] 1413 >- (qpat_x_assum `Brz_invariant _ _ _` mp_tac 1414 >> `?next_state state_map table. 1415 acc = (next_state,state_map,table)` by metis_tac[ABS_PAIR_THM] 1416 >> pop_assum subst_all_tac 1417 >> rw [Brz_invariant_def] 1418 >- metis_tac [deleteFindMin_thm,invar_def] 1419 >- metis_tac[deleteFindMin_thm,invar_def, 1420 oin_fdom,IN_UNION,IN_INSERT,EXTENSION] 1421 >- (qpat_x_assum `dom (union seen work) = dom state_map` 1422 (subst_all_tac o SYM) 1423 >> fs [invar_def,dom_def,union_def, mem_regexp_def,GSYM oin_def] 1424 >> `invariant regexp_compare work' /\ 1425 fdom regexp_compare work = 1426 {r} UNION fdom regexp_compare work'` 1427 by metis_tac[deleteFindMin_thm,invar_def] 1428 >> fs [GSYM oin_fdom,EXTENSION,oin_ounion,good_oset_def, 1429 regexp_compare_good] 1430 >> metis_tac[])) 1431 >- metis_tac [Brz_inv_pres,ABS_PAIR_THM]) 1432QED 1433 1434 1435Theorem Brz_mono[local] : 1436 !seen work acc. 1437 dom_Brz seen work acc 1438 ==> 1439 !seen' acc'. 1440 Brz_invariant seen work acc /\ 1441 (Brzozowski seen work acc = (seen',acc')) 1442 ==> 1443 submap regexp_compare (FST (SND acc)) (FST(SND acc')) 1444Proof 1445 ho_match_mp_tac Brzozowski_ind 1446 >> rw[] 1447 >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac 1448 >> rw [Brzozowski_eqns] 1449 >> fs [] 1450 >- metis_tac [submap_id] 1451 >- (`?r work'. deleteFindMin work = ((r,()),work')` 1452 by metis_tac [ABS_PAIR_THM,oneTheory.one] 1453 >> fs[] 1454 >> BasicProvers.NORM_TAC set_ss [] 1455 >- (first_assum match_mp_tac 1456 >> qexists_tac `seen'` >> rw[] 1457 >> qpat_x_assum `Brz_invariant seen work acc` mp_tac 1458 >> `?next_state state_map table. 1459 acc = (next_state,state_map,table)` by metis_tac[ABS_PAIR_THM] 1460 >> pop_assum subst_all_tac 1461 >> rw [Brz_invariant_def,invar_def] 1462 >- metis_tac [deleteFindMin_thm] 1463 >- metis_tac[deleteFindMin_thm,invar_def, 1464 oin_fdom,IN_UNION,IN_INSERT,EXTENSION] 1465 >- (qpat_x_assum `dom (union seen work) = dom state_map` 1466 (subst_all_tac o SYM) 1467 >> fs [invar_def,dom_def,union_def, 1468 mem_regexp_def,GSYM oin_def] 1469 >> `invariant regexp_compare work' /\ 1470 fdom regexp_compare work = 1471 {r} UNION fdom regexp_compare work'` 1472 by metis_tac[deleteFindMin_thm,invar_def] 1473 >> fs [GSYM oin_fdom,EXTENSION,oin_ounion, 1474 good_oset_def,regexp_compare_good] 1475 >> metis_tac[]) 1476 ) 1477 >- (`?next_state state_map table. 1478 acc = (next_state,state_map,table)` by metis_tac[ABS_PAIR_THM] 1479 >> pop_assum subst_all_tac 1480 >> imp_res_tac Brz_inv_pres 1481 >> fs [] 1482 >> match_mp_tac submap_trans 1483 >> qexists_tac 1484 `FST (SND (build_table (transitions r) r 1485 (next_state,state_map,table)))` 1486 >> fs[] 1487 >> rw[build_table_def,LET_THM] 1488 >> `?next_state' state_map' trans'. 1489 extend_states next_state state_map [] (transitions r) 1490 = (next_state',state_map', trans')` 1491 by metis_tac[ABS_PAIR_THM] 1492 >> rw[] 1493 >> `invariant regexp_compare state_map` by metis_tac [Brz_invariant_def,invar_def] 1494 >> imp_res_tac extend_states_thm 1495 >> CASE_TAC 1496 >> rw[] 1497 >> match_mp_tac submap_mono 1498 >> rw[eq_cmp_regexp_compare] 1499 >> strip_tac 1500 >> `r IN fdom regexp_compare state_map'` by metis_tac[submap_def] 1501 >> pop_assum mp_tac 1502 >> simp_tac set_ss [fdom_def] 1503 >> metis_tac[NOT_SOME_NONE])) 1504QED 1505 1506Theorem Brz_invariant_final[local] : 1507 !seen next_state state_map table. 1508 Brz_invariant seen empty (next_state,state_map,table) 1509 ==> 1510 (next_state = LENGTH table) /\ 1511 PERM (MAP FST table) (COUNT_LIST (LENGTH table)) /\ 1512 (!x. MEM x (get_accepts state_map) ==> x < LENGTH table) /\ 1513 (!n l c. (ALOOKUP table n = SOME l) /\ (?x. ALOOKUP l c = SOME x) ==> MEM c ALPHABET) 1514Proof 1515 simp [Brz_invariant_def,invar_def,set_def,ounion_oempty,union_def] >> 1516 rpt gen_tac >> strip_tac >> conj_asm1_tac 1517 >- (`!x. x IN set (MAP FST table) ==> x < next_state` 1518 by (fs [count_def, EXTENSION, range_def, frange_def,dom_def] 1519 >> rw [fdom_oimage_inst,apply_def,oin_fdom] 1520 >> fs [fdom_def,fapply_def] 1521 >> metis_tac [THE_DEF,ounion_oempty,oempty_def]) 1522 >> 1523 `LENGTH table = CARD (set (MAP FST table))` 1524 by metis_tac [LENGTH_MAP,ALL_DISTINCT_CARD_LIST_TO_SET, good_table_def] 1525 >> 1526 `fdom num_cmp (oimage num_cmp (apply state_map) seen) = range state_map` 1527 by (simp_tac set_ss [range_def, frange_def] >> 1528 qpat_x_assum `dom _ = dom __` mp_tac >> 1529 rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >> 1530 fs[fdom_def,ounion_oempty, GSYM oempty_def,EXTENSION] >> 1531 metis_tac[THE_DEF]) 1532 >> rw []) 1533 >> conj_asm1_tac 1534 >- (match_mp_tac PERM_ALL_DISTINCT 1535 >> rw [all_distinct_count_list, MEM_COUNT_LIST] 1536 >- fs [good_table_def] 1537 >- (`fdom num_cmp (oimage num_cmp (apply state_map) seen) = range state_map` 1538 by (simp_tac set_ss [range_def, frange_def] >> 1539 qpat_x_assum `dom _ = dom __` mp_tac >> 1540 simp_tac set_ss [ounion_oempty, GSYM oempty_def] >> 1541 rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >> 1542 fs [fdom_def,EXTENSION] >> metis_tac[THE_DEF]) 1543 >> qpat_x_assum `set (MAP FST table) = _` (subst_all_tac o SYM) 1544 >> rw[])) 1545 >> conj_tac 1546 >- (rw [mem_get_accepts] 1547 >> `x IN range state_map` 1548 by (simp_tac set_ss [range_def, frange_def] >> metis_tac[]) 1549 >> rfs [count_def]) 1550 >- (rw [] >> fs [good_table_def] >> metis_tac []) 1551QED 1552 1553 1554(*---------------------------------------------------------------------------*) 1555(* table_lang parallels the definition of exec_dfa *) 1556(*---------------------------------------------------------------------------*) 1557 1558Definition table_lang_def : 1559 (table_lang final_states table n [] = MEM n final_states) /\ 1560 (table_lang final_states table n (c::t) = 1561 case ALOOKUP table n of 1562 | NONE => F 1563 | SOME table2 => 1564 case ALOOKUP table2 c of 1565 | NONE => F 1566 | SOME n' => table_lang final_states table n' t) 1567End 1568 1569Theorem table_lang_correct[local] : 1570 !finals table state_map. 1571 fmap_inj regexp_compare state_map /\ 1572 good_table state_map table /\ 1573 (set (MAP FST table) = frange regexp_compare state_map) /\ 1574 (!n r. (lookup regexp_compare r state_map = SOME n) ==> (MEM n finals <=> nullable r)) 1575 ==> 1576 !n r s. 1577 (!c. MEM c s ==> MEM c ALPHABET) /\ 1578 (lookup regexp_compare r state_map = SOME n) 1579 ==> 1580 (table_lang finals table n s <=> smart_deriv_matches r (MAP CHR s)) 1581Proof 1582 rpt gen_tac >> 1583 strip_tac >> 1584 Induct_on `s` >> 1585 rw [table_lang_def, smart_deriv_matches_def] >> 1586 fs [good_table_def] >> 1587 BasicProvers.EVERY_CASE_TAC >> 1588 rw [] 1589 >- (imp_res_tac alistTheory.ALOOKUP_NONE 1590 >> fs [EXTENSION] 1591 >> `n NOTIN frange regexp_compare state_map` by metis_tac[] 1592 >> fs [frange_def]) 1593 >- metis_tac [alistTheory.ALOOKUP_NONE] 1594 >- (`?r'.(lookup regexp_compare r' state_map = SOME n) /\ 1595 (lookup regexp_compare (smart_deriv h r') state_map = 1596 SOME x')` by metis_tac [] 1597 >> `table_lang finals table x' s <=> 1598 smart_deriv_matches (smart_deriv h r') (MAP CHR s)` 1599 by metis_tac[] 1600 >> pop_assum SUBST_ALL_TAC 1601 >> `r' = r` 1602 by (qpat_x_assum `fmap_inj _ __` mp_tac 1603 >> rw_tac set_ss [fmap_inj_def,fdom_def,regexp_compare_eq]) 1604 >> rw [] 1605 >> metis_tac [ORD_CHR_lem,mem_alphabet]) 1606QED 1607 1608Theorem vec_lang_lem1[local] : 1609 !(n:num). 1610 MEM n finals_list <=> (ALOOKUP (MAP (\x. (x,T)) finals_list) n = SOME T) 1611Proof 1612 rw [EQ_IMP_THM] 1613 >- rw [alistTheory.ALOOKUP_TABULATE] 1614 >- (`MEM (n,T) (MAP (\x. (x,T)) finals_list)` by METIS_TAC [alistTheory.ALOOKUP_MEM] 1615 >> fs[MEM_MAP]) 1616QED 1617 1618Theorem vec_lang_lem2 = 1619 alistTheory.alookup_stable_sorted 1620 |> Q.INST_TYPE [`:'a` |-> `:num`, `:'b` |-> `:(num, 'a) alist`] 1621 |> Q.SPECL [`$<=`, `mergesort`, `n`, `table`] 1622 |> SIMP_RULE (srw_ss()++ARITH_ss) [transitive_def, total_def, mergesort_STABLE_SORT]; 1623 1624Theorem vec_lang_lem3 = 1625 alistTheory.alookup_stable_sorted 1626 |> Q.INST_TYPE [`:'a` |-> `:num`, `:'b` |-> `:'a option`] 1627 |> Q.SPECL [`$<=`, `mergesort`, `n`, `MAP (\(c,n). (c,SOME n)) table2`] 1628 |> SIMP_RULE (srw_ss()++ARITH_ss) 1629 [transitive_def, total_def, mergesort_STABLE_SORT, stringTheory.char_le_def]; 1630 1631Theorem vec_lang_lem4[local] : 1632 !l x. ALOOKUP (MAP (\(c,n). (c,SOME n)) l) x = OPTION_MAP SOME (ALOOKUP l x) 1633Proof 1634 Induct_on `l` >> 1635 rw [] >> 1636 PairCases_on `h` >> 1637 rw [] >> 1638 fs [stringTheory.ORD_11] 1639QED 1640 1641 1642Theorem table_to_vec_thm[local] : 1643 !table final_states max_char max_state table' final_states'. 1644 (max_char = alphabet_size) /\ 1645 SORTED $<= final_states /\ 1646 (!x. MEM x final_states ==> x < max_state) /\ 1647 (!n l c. (ALOOKUP table n = SOME l) /\ 1648 (?x. ALOOKUP l c = SOME x) ==> c < max_char) /\ 1649 PERM (MAP FST table) (COUNT_LIST (LENGTH table)) /\ 1650 (table_to_vectors table = table') /\ 1651 (accepts_to_vector final_states max_state = final_states') 1652 ==> 1653 (LENGTH table' = LENGTH table) /\ 1654 (LENGTH final_states' = max_state) /\ 1655 (!n. MEM n final_states <=> n < LENGTH final_states' /\ EL n final_states') /\ 1656 (!n. (?l. ALOOKUP table n = SOME l) <=> n < LENGTH table') /\ 1657 (!n l. n < LENGTH table' ==> (LENGTH (EL n table') = max_char)) /\ 1658 (!n c x. 1659 (?l. (ALOOKUP table n = SOME l) /\ (ALOOKUP l c = SOME x)) <=> 1660 n < LENGTH table' /\ 1661 c < LENGTH (EL n table') /\ (EL c (EL n table') = SOME x)) 1662Proof 1663 simp [good_table_def, table_to_vectors_def,accepts_to_vector_def] >> 1664 rpt gen_tac >> 1665 strip_tac >> 1666 qabbrev_tac `sorted_table = mergesort (inv_image $<= FST) table` >> 1667 `LENGTH sorted_table = LENGTH table` by metis_tac [mergesort_perm, PERM_LENGTH] >> 1668 `SORTS mergesort (inv_image $<= (FST : num # (num,'a) alist -> num))` 1669 by metis_tac [mergesort_STABLE_SORT, STABLE_DEF, 1670 leq_thm, total_inv_image, transitive_inv_image] >> 1671 `MAP FST sorted_table = COUNT_LIST (LENGTH table)` 1672 by (match_mp_tac sorted_perm_count_list >> 1673 fs [SORTS_DEF] >> 1674 rw [] >> 1675 metis_tac [PERM_TRANS, PERM_SYM, PERM_MAP]) >> 1676 simp [] >> 1677 conj_tac 1678 >- metis_tac[mergesort_perm,PERM_LENGTH] >> 1679 conj_tac 1680 >- (`SORTED $<= (MAP FST (MAP (\x.(x,T)) final_states))` 1681 by rw [MAP_MAP_o, combinTheory.o_DEF] 1682 >> metis_tac [alist_to_vec_thm]) >> 1683 conj_tac 1684 >- (`SORTED $<= (MAP FST (MAP (\x.(x,T)) final_states))` 1685 by rw [MAP_MAP_o, combinTheory.o_DEF] 1686 >> `LENGTH (alist_to_vec (MAP (\x. (x,T)) final_states) 1687 F max_state max_state) = max_state` 1688 by METIS_TAC [alist_to_vec_thm] 1689 >> POP_ASSUM SUBST_ALL_TAC 1690 >> RW_TAC std_ss [EQ_IMP_THM] 1691 >- (`n < max_state` by METIS_TAC [] >> 1692 `ALOOKUP (MAP (\x. (x,T)) final_states) n = SOME T` 1693 by METIS_TAC [vec_lang_lem1] >> METIS_TAC [alist_to_vec_thm]) 1694 >- (CCONTR_TAC >> fs [] 1695 >> Cases_on `ALOOKUP (MAP (\x. (x,T)) final_states) n` 1696 >- (metis_tac [alist_to_vec_thm, vec_lang_lem1]) 1697 >- (NTAC 2 (POP_ASSUM MP_TAC) >> rw [vec_lang_lem1] 1698 >> DISCH_TAC >> fs[] >> rw[] >> metis_tac [alist_to_vec_thm]))) 1699 >> 1700 conj_asm1_tac 1701 >- (rw [] >> eq_tac >> rw [] 1702 >- (imp_res_tac alistTheory.ALOOKUP_MEM >> 1703 imp_res_tac MEM_PERM >> 1704 fs [MEM_MAP, MEM_COUNT_LIST] >> 1705 metis_tac [FST,mergesort_perm,PERM_LENGTH]) 1706 >- (`ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))` 1707 by metis_tac [dense_alist_to_vec_thm, 1708 LENGTH_COUNT_LIST,mergesort_perm,PERM_LENGTH] >> 1709 `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))` 1710 by metis_tac [vec_lang_lem2] >> 1711 rw [])) >> 1712 `SORTS mergesort (inv_image $<= (FST : num # 'a option -> num))` 1713 by metis_tac [mergesort_STABLE_SORT, STABLE_DEF, leq_thm, 1714 total_inv_image, transitive_inv_image] >> 1715 `(\a b:num. a <= b) = $<=` by metis_tac [] >> pop_assum SUBST_ALL_TAC >> 1716 simp [EL_MAP,length_mergesort] >> 1717 conj_tac 1718 >- (rw [] >> 1719 `?n' table2'. EL n sorted_table = (n',table2')` 1720 by metis_tac [pair_CASES] >> rw [] >> 1721 qabbrev_tac `sorted_table2 = 1722 mergesort (inv_image $<= FST) (MAP (\(c,n). (c,SOME n)) table2')` >> 1723 qabbrev_tac `table2 = EL n (MAP SND sorted_table)` >> 1724 `SORTED $<= (MAP FST sorted_table2)` 1725 by (fs [SORTS_DEF, sorted_map, leq_thm] >> metis_tac []) >> 1726 metis_tac [alist_to_vec_thm]) 1727 >- (* last conjunct *) 1728 (fs [length_mergesort] >> 1729 rw [] >> eq_tac >> strip_tac >> res_tac >> simp [EL_MAP] >> 1730 `?n' table2'. EL n sorted_table = (n',table2')` by metis_tac [pair_CASES] >> 1731 rw [] >> 1732 qabbrev_tac `sorted_table2 = 1733 mergesort (inv_image $<= FST) (MAP (\(c,n). (c,SOME n)) table2')` >> 1734 qabbrev_tac `table2 = EL n (MAP SND sorted_table)` >> 1735 `SORTED $<= (MAP FST sorted_table2)` 1736 by (fs [SORTS_DEF, sorted_map, leq_thm] >> metis_tac []) 1737 >- metis_tac [alist_to_vec_thm] 1738 >- (imp_res_tac alist_to_vec_thm >> 1739 fs [AND_IMP_INTRO] >> 1740 FIRST_X_ASSUM match_mp_tac >> rw [] >> 1741 `table2' = table2` by (UNABBREV_ALL_TAC >> rw [EL_MAP]) >> rw [] >> 1742 `ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))` 1743 by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST] >> 1744 `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))` 1745 by metis_tac [vec_lang_lem2] >> 1746 rw [] >> 1747 fs [] >> 1748 `ALOOKUP (MAP (\(c,n). (c,SOME n)) table2) c = SOME (SOME x)` 1749 by metis_tac [vec_lang_lem4, OPTION_MAP_DEF] >> 1750 fs [Once (GSYM vec_lang_lem3)] >> 1751 metis_tac [NOT_SOME_NONE, alist_to_vec_thm]) 1752 >- (REV_FULL_SIMP_TAC std_ss [EL_MAP] >> 1753 `table2' = table2` by (UNABBREV_ALL_TAC >> rw [EL_MAP]) >> fs [] >> 1754 res_tac >> simp [] >> 1755 Cases_on `ALOOKUP sorted_table2 c` 1756 >- (`ALOOKUP (MAP (\(c,n). (c,SOME n)) table2) c = NONE` 1757 by metis_tac [vec_lang_lem4, OPTION_MAP_DEF, vec_lang_lem3] >> 1758 metis_tac [NOT_SOME_NONE, alist_to_vec_thm]) 1759 >- (`ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))` 1760 by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST] >> 1761 `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))` 1762 by metis_tac [vec_lang_lem2] >> 1763 rw [] >> 1764 REV_FULL_SIMP_TAC std_ss [EL_MAP] >> 1765 `SOME x = x'` by metis_tac [SOME_11, alist_to_vec_thm] >> 1766 simp [] >> 1767 qunabbrev_tac `sorted_table2` >> 1768 fs [Once vec_lang_lem3] >> 1769 fs [vec_lang_lem4]))) 1770QED 1771 1772 1773(*---------------------------------------------------------------------------*) 1774(* Well-formedness of final state vector *) 1775(*---------------------------------------------------------------------------*) 1776 1777Definition good_vec_def : 1778 good_vec vec final_states 1779 <=> 1780 (!l c. MEM c ALPHABET /\ MEM l vec ==> c < LENGTH l) /\ 1781 (!n1 c n2 l. n1 < LENGTH vec /\ (EL n1 vec = l) /\ 1782 c < LENGTH (EL n1 vec) /\ 1783 (EL c (EL n1 vec) = SOME n2) ==> n2 < LENGTH vec) /\ 1784 (LENGTH final_states = LENGTH vec) 1785End 1786 1787 1788Theorem Brz_inv_to_good_vec[local] : 1789 !seen next_state state_map table. 1790 Brz_invariant seen empty (next_state,state_map,table) /\ 1791 (table_to_vectors table = vec) /\ 1792 (accepts_to_vector (get_accepts state_map) next_state = final_states) 1793 ==> 1794 (next_state = LENGTH table) /\ 1795 good_vec vec final_states /\ 1796 (!r n. (lookup regexp_compare r state_map = SOME n) ==> n < LENGTH vec) 1797Proof 1798 simp [good_vec_def] 1799 >> rpt gen_tac >> strip_tac 1800 >> imp_res_tac Brz_invariant_final 1801 >> fs [Brz_invariant_def,invar_def, 1802 ounion_oempty,GSYM oempty_def,union_def, set_def, oset_def] 1803 >> 1804 `(LENGTH vec = LENGTH table) /\ 1805 (LENGTH final_states = LENGTH table) /\ 1806 (!n. MEM n (get_accepts state_map) <=> n < LENGTH final_states /\ EL n final_states) /\ 1807 (!n. (?l. ALOOKUP table n = SOME l) <=> n < LENGTH vec) /\ 1808 (!n l. n < LENGTH vec ==> (LENGTH (EL n vec) = alphabet_size)) /\ 1809 (!n c x. 1810 (?l. (ALOOKUP table n = SOME l) /\ (ALOOKUP l c = SOME x)) 1811 <=> 1812 n < LENGTH vec /\ c < LENGTH (EL n vec) /\ 1813 (EL c (EL n vec) = SOME x))` 1814 by 1815 (match_mp_tac table_to_vec_thm 1816 >> rw [sorted_get_accepts] 1817 >> metis_tac [mem_alphabet,alphabet_size_def]) 1818 >> rw [] 1819 >- (fs [MEM_EL] >> rw [] 1820 >> metis_tac [EL_MEM,mem_alphabet,alphabet_size_def]) 1821 >- (`!a. a < LENGTH table <=> MEM a (MAP FST table)` 1822 by metis_tac [MEM_PERM,MEM_COUNT_LIST] 1823 >> rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def,SYM dom_def] 1824 >> rw [dom_def,fdom_def, member_iff_lookup] 1825 >> metis_tac [good_table_def,THE_DEF]) 1826 >- (`!a. a < LENGTH table <=> MEM a (MAP FST table)` 1827 by metis_tac [MEM_PERM,MEM_COUNT_LIST] 1828 >> rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def,SYM dom_def] 1829 >> simp_tac set_ss [dom_def,fdom_def, member_iff_lookup] 1830 >> metis_tac [THE_DEF]) 1831QED 1832 1833Theorem compile_regexp_good_vec : 1834 !r state_map table final_states. 1835 dom_Brz empty (singleton (normalize r) ()) 1836 (1,singleton (normalize r) 0,[]) /\ 1837 (compile_regexp r = (state_map, table, final_states)) 1838 ==> 1839 good_vec table final_states /\ 1840 (!r n. (lookup regexp_compare r state_map = SOME n) ==> n < LENGTH table) /\ 1841 normalize r IN fdom regexp_compare state_map 1842Proof 1843 simp [compile_regexp_def] 1844 >> rpt gen_tac >> strip_tac 1845 >> fs [] 1846 >> `?seen next_state state_map table. 1847 Brzozowski empty 1848 (singleton (normalize r) ()) 1849 (1,singleton (normalize r) 0,[]) = (seen,next_state,state_map,table)` 1850 by metis_tac [pair_CASES] 1851 >> fs [] 1852 >> `Brz_invariant empty (singleton (normalize r) ()) (1,singleton (normalize r) 0,[])` 1853 by 1854 (fs [Brz_invariant_def,invar_def,dom_def, normalize_thm, fmap_inj_def, good_table_def, 1855 singleton_thm,empty_def,invariant_def,mem_regexp_def,member_iff_lookup] 1856 >> rw [lookup_def] 1857 >- (fs[singleton_def,oin_def,member_def] 1858 >> BasicProvers.EVERY_CASE_TAC 1859 >- metis_tac[] 1860 >- metis_tac[eq_cmp_regexp_compare,eq_cmp_def,normalize_thm] 1861 >- metis_tac[]) 1862 >- (fs [fdom_def, singleton_def,lookup_def] 1863 >> BasicProvers.EVERY_CASE_TAC 1864 >> rw [] 1865 >> metis_tac [eq_cmp_regexp_compare, eq_cmp_def,normalize_thm]) 1866 >- (rw [union_def,set_def,GSYM empty_def, GSYM oempty_def] >> 1867 rw [EXTENSION,fdom_def, osingleton_def, singleton_def, lookup_def] >> 1868 CASE_TAC) 1869 >- (rw [range_def,frange_def, EXTENSION, singleton_def,lookup_def,EQ_IMP_THM] 1870 >- (BasicProvers.EVERY_CASE_TAC >> rw []) 1871 >- (Q.EXISTS_TAC `normalize r` >> rw [regexp_compare_id] >> decide_tac)) 1872 >- (rw [GSYM empty_def, GSYM oempty_def] 1873 >> rw [fdom_def, lookup_def, empty_def, oempty_def]) 1874 >- (ntac 2 (pop_assum mp_tac) 1875 >> simp_tac set_ss [fdom_def,singleton_def,lookup_def] 1876 >> BasicProvers.EVERY_CASE_TAC 1877 >> rw[] 1878 >> metis_tac [eq_cmp_def, eq_cmp_regexp_compare])) 1879 >> imp_res_tac Brz_inv_thm 1880 >> imp_res_tac Brz_mono 1881 >> imp_res_tac Brz_inv_to_good_vec 1882 >> fs [] 1883 >> rw [] 1884 >- metis_tac [] 1885 >- (`normalize r IN fdom regexp_compare (singleton (normalize r) 0)` 1886 by rw [submap_def,fdom_def,singleton_def,lookup_def,regexp_compare_id] 1887 >> metis_tac [submap_def]) 1888QED 1889 1890Theorem vec_lang_correct : 1891 !table final_states max_char vec final_states'. 1892 (max_char = alphabet_size) /\ 1893 SORTED $<= final_states /\ 1894 (!x. MEM x final_states ==> x < LENGTH table) /\ 1895 (!n l c. (ALOOKUP table n = SOME l) /\ (?x. ALOOKUP l c = SOME x) ==> c < max_char) /\ 1896 (PERM (MAP FST table) (COUNT_LIST (LENGTH table))) /\ 1897 (!n'. n' IN BIGUNION (IMAGE alist_range (alist_range table)) ==> n' < LENGTH table) /\ 1898 (table_to_vectors table = vec) /\ 1899 (accepts_to_vector final_states (LENGTH table) = final_states') 1900 ==> 1901 !n s. (!c. MEM c s ==> c < max_char) /\ n < LENGTH table 1902 ==> 1903 (table_lang final_states table n s 1904 <=> 1905 exec_dfa (fromList final_states') 1906 (fromList (MAP fromList vec)) 1907 n 1908 (MAP CHR s)) 1909Proof 1910 rpt gen_tac 1911 >> strip_tac 1912 >> `(LENGTH vec = LENGTH table) /\ 1913 (LENGTH final_states' = LENGTH table) /\ 1914 (!n. MEM n final_states <=> n < LENGTH final_states' /\ EL n final_states') /\ 1915 (!n. (?l. ALOOKUP table n = SOME l) <=> n < LENGTH vec) /\ 1916 (!n l. n < LENGTH vec ==> (LENGTH (EL n vec) = max_char)) /\ 1917 (!n c x. 1918 (?l. (ALOOKUP table n = SOME l) /\ (ALOOKUP l c = SOME x)) 1919 <=> 1920 n < LENGTH vec /\ c < LENGTH (EL n vec) /\ 1921 (EL c (EL n vec) = SOME x))` 1922 by (match_mp_tac table_to_vec_thm 1923 >> rw [] 1924 >> metis_tac []) 1925 >> Induct_on `s` 1926 >> rw [table_lang_def, exec_dfa_thm,fromList_Vector,sub_def] 1927 >> `h < alphabet_size` by metis_tac [] 1928 >> rw [ORD_CHR_lem] 1929 >> BasicProvers.EVERY_CASE_TAC 1930 >> fs [] 1931 >- metis_tac [NOT_SOME_NONE] 1932 >- (`n < LENGTH (table_to_vectors table)` by metis_tac[] 1933 >> fs [EL_MAP,sub_def] 1934 >> metis_tac [SOME_11, NOT_SOME_NONE]) 1935 >- (`n < LENGTH (table_to_vectors table)` by metis_tac[] 1936 >> fs [EL_MAP,sub_def] 1937 >> metis_tac [NOT_SOME_NONE]) 1938 >- (`n < LENGTH (table_to_vectors table)` by metis_tac[] 1939 >> fs [EL_MAP,sub_def] 1940 >> `x' = x''` by metis_tac [SOME_11, NOT_SOME_NONE] 1941 >> rw [] 1942 >> fs [fromList_Vector] 1943 >> FIRST_X_ASSUM match_mp_tac 1944 >> FIRST_X_ASSUM match_mp_tac 1945 >> rw [PULL_EXISTS] 1946 >> rw [alistTheory.alist_range_def, EXTENSION] 1947 >> metis_tac []) 1948QED 1949 1950 1951(*---------------------------------------------------------------------------*) 1952(* Main correctness theorem: strings accepted by DFA are just the strings *) 1953(* specified by regexp r *) 1954(*---------------------------------------------------------------------------*) 1955 1956Theorem Brzozowski_correct : 1957 !r s. 1958 dom_Brz empty (singleton (normalize r) ()) 1959 (1,singleton (normalize r) 0,[]) /\ 1960 (!c. MEM c s ==> MEM (ORD c) ALPHABET) 1961 ==> 1962 (Brz_lang r s = regexp_lang (normalize r) s) 1963Proof 1964 rw [GSYM regexp_lang_smart_deriv, Brz_lang_def] 1965 >> UNABBREV_ALL_TAC 1966 >> fs [compile_regexp_def] 1967 >> `?seen next_state state_map table. 1968 Brzozowski empty (singleton (normalize r) ()) 1969 (1,singleton (normalize r) 0,[]) = (seen,next_state,state_map,table)` 1970 by metis_tac [pair_CASES,SND] 1971 >> fs [LET_THM] 1972 >> rw [] 1973 >> `Brz_invariant empty (singleton (normalize r) ()) 1974 (1,singleton (normalize r) 0,[])` 1975 by (rw_tac set_ss [Brz_invariant_def,invar_def,dom_def, normalize_thm, 1976 fmap_inj_def, good_table_def, singleton_thm,empty_def, 1977 invariant_def,mem_regexp_def,member_iff_lookup] 1978 >- (fs[singleton_def,oin_def,member_def] 1979 >> BasicProvers.EVERY_CASE_TAC 1980 >- metis_tac[] 1981 >- metis_tac[eq_cmp_regexp_compare,eq_cmp_def,normalize_thm] 1982 >- metis_tac[]) 1983 >- fs [lookup_def] 1984 >- (fs [fdom_def, singleton_def,lookup_def] 1985 >> BasicProvers.EVERY_CASE_TAC 1986 >> rw [] 1987 >> metis_tac [eq_cmp_regexp_compare, eq_cmp_def,normalize_thm]) 1988 >- (rw [union_def,set_def,GSYM empty_def, GSYM oempty_def] >> 1989 rw [EXTENSION,fdom_def, osingleton_def, singleton_def, lookup_def] >> 1990 CASE_TAC) 1991 >- (rw [range_def,frange_def, EXTENSION, singleton_def,lookup_def,EQ_IMP_THM] 1992 >- (BasicProvers.EVERY_CASE_TAC >> rw []) 1993 >- (Q.EXISTS_TAC `normalize r` >> rw [regexp_compare_id] >> decide_tac)) 1994 >- (rw [GSYM empty_def, GSYM oempty_def] 1995 >> rw [fdom_def, lookup_def, empty_def, oempty_def]) 1996 >- (ntac 2 (pop_assum mp_tac) 1997 >> simp_tac set_ss [fdom_def,singleton_def,lookup_def] 1998 >> BasicProvers.EVERY_CASE_TAC 1999 >> rw[] 2000 >> metis_tac [eq_cmp_def, eq_cmp_regexp_compare]) 2001 >- fs [alistTheory.ALOOKUP_def] 2002 >- fs [alistTheory.ALOOKUP_def] 2003 >- fs [alistTheory.ALOOKUP_def]) 2004 >> imp_res_tac Brz_inv_thm 2005 >> imp_res_tac Brz_mono 2006 >> imp_res_tac Brz_invariant_final 2007 >> fs [Brz_invariant_def] 2008 >> `smart_deriv_matches (normalize r) s 2009 <=> 2010 table_lang (get_accepts state_map) table (apply state_map (normalize r)) (MAP ORD s)` 2011 by (fs [invar_def,set_def,ounion_oempty,union_def] 2012 >> `fdom num_cmp (oimage num_cmp (apply state_map) seen) 2013 = frange regexp_compare state_map` 2014 by (simp_tac set_ss [frange_def] >> 2015 qpat_x_assum `dom _ = dom __` mp_tac >> 2016 rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >> 2017 fs[ounion_oempty, GSYM oempty_def] >> 2018 simp_tac set_ss [fdom_def] >> metis_tac[THE_DEF]) 2019 >> mp_tac (table_lang_correct 2020 |> INST_TYPE [alpha |-> ``:num``] 2021 |> Q.SPEC `get_accepts state_map` 2022 |> Q.SPEC `table:(num,(num,num)alist)alist` 2023 |> Q.SPEC `state_map:(regexp,num)balanced_map`) 2024 >> fs[] 2025 >> `(!n r. (lookup regexp_compare r state_map = SOME n) ==> 2026 (MEM n (get_accepts state_map) <=> nullable r))` 2027 by (rw [mem_get_accepts,EQ_IMP_THM] 2028 >- (`r' IN fdom regexp_compare state_map` by rw [fdom_def] 2029 >> metis_tac [fmap_inj_def, eq_cmp_regexp_compare, eq_cmp_def]) 2030 >- metis_tac[]) 2031 >> rw [] 2032 >> pop_assum mp_tac 2033 >> imp_res_tac string_to_numlist 2034 >> DISCH_THEN (assume_tac o Q.SPECL [`0n`, `normalize r`, `nl`]) 2035 >> rfs[] 2036 >> rw[] 2037 >> `normalize r IN fdom regexp_compare (singleton (normalize r) 0)` 2038 by (rw [fdom_def, singleton_def,lookup_def,EQ_IMP_THM] 2039 >> BasicProvers.EVERY_CASE_TAC 2040 >> fs [regexp_compare_id]) 2041 >> `normalize r IN fdom regexp_compare state_map /\ 2042 (lookup regexp_compare (normalize r) (singleton (normalize r) 0) = 2043 lookup regexp_compare (normalize r) state_map)` 2044 by metis_tac [submap_def] 2045 >> fs [singleton_def,lookup_def,regexp_compare_id] 2046 >> qpat_x_assum `table_lang (get_accepts state_map) table 0 nl <=> 2047 smart_deriv_matches (normalize r) (MAP CHR nl)` 2048 (SUBST_ALL_TAC o SYM) 2049 >> rw [MAP_MAP_o,combinTheory.o_DEF,apply_def, fapply_def] 2050 >> pop_assum (SUBST_ALL_TAC o SYM) 2051 >> rw [] 2052 >> `MAP (\x. ORD (CHR x)) nl = MAP (\x.x) nl` 2053 by (irule MAP_CONG >> rw [] >> metis_tac [mem_alphabet,ORD_CHR_lem]) 2054 >> rw[]) 2055 >> rw [] 2056 >> imp_res_tac string_to_numlist 2057 >> rw [MAP_MAP_o,combinTheory.o_DEF,apply_def, fapply_def] 2058 >> `MAP (\x. ORD (CHR x)) nl = MAP (\x.x) nl` 2059 by (irule MAP_CONG >> rw [] >> metis_tac [mem_alphabet,ORD_CHR_lem]) 2060 >> rw [] 2061 >> match_mp_tac (GSYM (SIMP_RULE (srw_ss()) [PULL_FORALL, AND_IMP_INTRO] vec_lang_correct)) 2062 >> rw [] 2063 >- rw [sorted_get_accepts] 2064 >- metis_tac [mem_alphabet] 2065 >- (fs [good_table_def, alistTheory.alist_range_def] 2066 >> res_tac 2067 >> `n'' IN range state_map` 2068 by (simp_tac set_ss [range_def, frange_def] >> metis_tac[]) 2069 >> `n'' IN count (LENGTH table)` by metis_tac [EXTENSION] 2070 >> metis_tac [IN_COUNT]) 2071 >- metis_tac [mem_alphabet] 2072 >- (`normalize r IN fdom regexp_compare (singleton (normalize r) 0)` 2073 by (rw [fdom_def, singleton_def,lookup_def,EQ_IMP_THM] 2074 >> BasicProvers.EVERY_CASE_TAC 2075 >> fs [regexp_compare_id]) 2076 >> `normalize r IN fdom regexp_compare state_map /\ 2077 (lookup regexp_compare (normalize r) state_map = 2078 lookup regexp_compare (normalize r) (singleton (normalize r) 0))` 2079 by metis_tac [submap_def] 2080 >> rw [singleton_def,lookup_def,regexp_compare_id] 2081 >> `0n IN range state_map` 2082 by (simp_tac set_ss [range_def, frange_def] 2083 >> Q.EXISTS_TAC `normalize r` 2084 >> rw [singleton_def,lookup_def,EQ_IMP_THM] 2085 >> CASE_TAC 2086 >> fs [regexp_compare_id]) 2087 >> `0n IN count (LENGTH table)` by metis_tac [EXTENSION] 2088 >> metis_tac [IN_COUNT]) 2089QED 2090 2091Theorem Brzozowski_correct_alt : 2092 !r s. 2093 dom_Brz empty (singleton (normalize r) ()) 2094 (1,singleton (normalize r) 0,[]) /\ 2095 (!c. MEM c s ==> MEM (ORD c) ALPHABET) 2096 ==> 2097 (regexp_matcher r s = regexp_lang r s) 2098Proof 2099 rw [] 2100 >> `Brz_lang r s = regexp_lang r s` 2101 by metis_tac [regexp_lang_normalize,Brzozowski_correct] 2102 >> pop_assum (SUBST_ALL_TAC o SYM) 2103 >> rw [Brz_lang_def, regexp_matcher_def, LET_THM,apply_def,fapply_def] 2104QED 2105 2106Theorem Brzozowski_partial_eval : 2107 !r state_numbering delta accepts deltaV acceptsV start_state. 2108 (compile_regexp r = (state_numbering,delta,accepts)) /\ 2109 (deltaV = fromList (MAP fromList delta)) /\ 2110 (acceptsV = fromList accepts) /\ 2111 (lookup regexp_compare (normalize r) state_numbering = SOME start_state) /\ 2112 dom_Brz empty (singleton (normalize r) ()) 2113 (1,singleton (normalize r) 0,[]) 2114 ==> 2115 !s. EVERY (\c. MEM (ORD c) ALPHABET) s 2116 ==> 2117 (exec_dfa acceptsV deltaV start_state s = regexp_lang r s) 2118Proof 2119 rw [EVERY_MEM] 2120 >> `regexp_matcher r s = regexp_lang r s` 2121 by metis_tac [Brzozowski_correct_alt] 2122 >> pop_assum (SUBST_ALL_TAC o SYM) 2123 >> rw [regexp_matcher_def, LET_THM] 2124QED 2125 2126 2127Theorem Brzozowski_partial_eval_conseq : 2128 !r state_numbering delta accepts deltaV acceptsV start_state. 2129 (compile_regexp r = (state_numbering,delta,accepts)) /\ 2130 (deltaV = fromList (MAP fromList delta)) /\ 2131 (acceptsV = fromList accepts) /\ 2132 (lookup regexp_compare (normalize r) state_numbering = SOME start_state) /\ 2133 dom_Brz_alt empty (singleton (normalize r) ()) 2134 ==> 2135 !s. EVERY (\c. ORD c < alphabet_size) s 2136 ==> 2137 (exec_dfa acceptsV deltaV start_state s = regexp_lang r s) 2138Proof 2139 metis_tac[Brzozowski_partial_eval,all_ord_string, 2140 dom_Brz_alt_equal,all_ord_string] 2141QED 2142 2143(*---------------------------------------------------------------------------*) 2144(* Eliminate check that all chars in string are in alphabet. This can be *) 2145(* be done when alphabet = [0..255] *) 2146(*---------------------------------------------------------------------------*) 2147 2148Theorem Brzozowski_partial_eval_256 = 2149 if Regexp_Type.alphabet_size = 256 then 2150 SIMP_RULE list_ss [ORD_BOUND,alphabet_size_def] Brzozowski_partial_eval_conseq 2151 else TRUTH; 2152 2153 2154Theorem regexp_matcher_correct: 2155 dom_Brz_alt empty (singleton (normalize r) ()) ==> 2156 (regexp_matcher r s <=> s IN regexp_lang r) 2157Proof 2158 rw[regexp_matcher_def] 2159 \\ pairarg_tac \\ fs[] 2160 \\ imp_res_tac compile_regexp_good_vec 2161 \\ rfs[dom_Brz_alt_equal,fdom_def] 2162 \\ imp_res_tac Brzozowski_partial_eval_256 2163 \\ simp[IN_DEF] 2164QED 2165 2166(* val _ = EmitTeX.tex_theory"-"; *) 2167 2168val _ = export_theory(); 2169