1open HolKernel Parse boolLib bossLib lcsymtacs numLib; 2open optionTheory pairTheory relationTheory arithmeticTheory 3 pred_setTheory bagTheory containerTheory 4 listTheory rich_listTheory stringTheory sortingTheory mergesortTheory 5 comparisonTheory balanced_mapTheory eq_cmp_bmapTheory osetTheory 6 finite_mapTheory vec_mapTheory charsetTheory regexpTheory 7; 8 9val _ = numLib.prefer_num(); 10 11fun pat_elim q = Q.PAT_X_ASSUM q (K ALL_TAC); 12 13val SET_EQ_THM = Q.prove 14(`!s1 s2. (s1 = s2) = !x. s1 x = s2 x`, 15 METIS_TAC [EXTENSION,IN_DEF]); 16 17val LENGTH_NIL_SYM = 18 GEN_ALL (CONV_RULE (LHS_CONV SYM_CONV) (SPEC_ALL LENGTH_NIL)); 19 20val list_ss = list_ss ++ rewrites [LENGTH_NIL, LENGTH_NIL_SYM]; 21 22val set_ss = list_ss ++ pred_setLib.PRED_SET_ss ++ rewrites [SET_EQ_THM,IN_DEF] 23 24val _ = new_theory "regexp_compiler"; 25 26val comparison_distinct = TypeBase.distinct_of ``:ordering`` 27 28val eq_cmp_regexp_compare = Q.prove 29(`eq_cmp regexp_compare`, 30 metis_tac [eq_cmp_def, regexp_compare_good,regexp_compare_eq]); 31 32(*---------------------------------------------------------------------------*) 33(* Output of the compiler is in term of vectors. *) 34(*---------------------------------------------------------------------------*) 35 36val _ = Hol_datatype `vector = Vector of 'a list`; 37 38val fromList_def = Define `fromList l = Vector l`; 39val sub_def = Define `sub (Vector l) n = EL n l`; 40val length_def = Define `length (Vector l) = LENGTH l`; 41 42val fromList_Vector = save_thm 43("fromList_Vector", 44 SIMP_RULE list_ss [GSYM FUN_EQ_THM] fromList_def); 45 46(*---------------------------------------------------------------------------*) 47(* Manipulate the set of seen states *) 48(*---------------------------------------------------------------------------*) 49 50val _ = Parse.type_abbrev("regexp_set", ``:(regexp,unit)balanced_map``); 51 52val insert_regexp_def = 53 Define 54 `insert_regexp r seen = balanced_map$insert regexp_compare r () seen`; 55 56val mem_regexp_def = 57 Define 58 `mem_regexp r seen = balanced_map$member regexp_compare r seen`; 59 60(*---------------------------------------------------------------------------*) 61(* Transitions out of a state (regexp). *) 62(*---------------------------------------------------------------------------*) 63 64val transitions_def = 65 Define 66 `transitions r = MAP (\c. (c,smart_deriv c r)) ALPHABET`; 67 68val extend_states_def = 69 Define 70 `(extend_states next_state state_map trans [] = (next_state,state_map,trans)) /\ 71 (extend_states next_state state_map trans ((c,r')::t) = 72 case balanced_map$lookup regexp_compare r' state_map 73 of SOME n => extend_states next_state state_map ((c,n)::trans) t 74 | NONE => extend_states (next_state + 1n) 75 (balanced_map$insert regexp_compare r' next_state state_map) 76 ((c,next_state)::trans) 77 t)`; 78 79val extend_states_ind = fetch "-" "extend_states_ind"; 80 81val build_table_def = 82 Define 83 `build_table arcs r (next_state,state_map,table) = 84 let (next_state,state_map,trans) = extend_states next_state state_map [] arcs 85 in case balanced_map$lookup regexp_compare r state_map 86 of SOME n => (next_state, state_map, (n,trans)::table) 87 | NONE => (next_state + 1n, 88 balanced_map$insert regexp_compare r next_state state_map, 89 (next_state,trans)::table)`; 90 91(*---------------------------------------------------------------------------*) 92(* The core regexp compiler. The seen argument holds the already-seen *) 93(* regexps, which map to states in the final DFA. The n argument is a *) 94(* step-index used to ensure that the function terminates. *) 95(*---------------------------------------------------------------------------*) 96 97val Brz_def = 98 Define 99 `Brz seen worklist acc n = 100 if n <= 0n then NONE else 101 case worklist of 102 [] => SOME (seen,acc) 103 | r::t => 104 if mem_regexp r seen then Brz seen t acc (n-1) else 105 let arcs = transitions r 106 in Brz (insert_regexp r seen) 107 (remove_dups (MAP SND arcs) ++ t) 108 (build_table arcs r acc) 109 (n-1)`; 110 111val MAXNUM_32_def = Define `MAXNUM_32:num = 2147483647`; 112 113(*---------------------------------------------------------------------------*) 114(* Build Greve-style Brz function *) 115(*---------------------------------------------------------------------------*) 116 117val MAX_LE_THM = Q.prove 118(`!m n. m <= MAX m n /\ n <= MAX m n`, 119 RW_TAC arith_ss [MAX_DEF]); 120 121val IS_SOME_EXISTS = Q.prove 122(`!x. IS_SOME x = ?y. x = SOME y`, 123 Cases THEN METIS_TAC [optionTheory.IS_SOME_DEF]); 124 125(*---------------------------------------------------------------------------*) 126(* Domain of the function. *) 127(*---------------------------------------------------------------------------*) 128 129val dom_Brz_def = 130 Define 131 `dom_Brz seen worklist acc = ?d. IS_SOME(Brz seen worklist acc d)`; 132 133(*---------------------------------------------------------------------------*) 134(* Create measure function rdepth. Uses following code copied from *) 135(* *) 136(* HOLDIR/src/pfl/defchoose *) 137(* *) 138(*---------------------------------------------------------------------------*) 139 140(*---------------------------------------------------------------------------*) 141(* MINCHOOSE (sname, cname,``!x1 ... xn. ?z. M[x1,...,xn,z]``) *) 142(* returns *) 143(* *) 144(* |- !x1...xn z. M[x1,...,xn,z] ==> *) 145(* M[x1,...,xn,cname x1...xn] /\ *) 146(* !m. M[x1,...,xn,m] ==> cname x1...xn <= m *) 147(* *) 148(* where cname in the theorem is a constant. This theorem is stored in the *) 149(* current theory under sname. Thus this rule is a form of the *) 150(* well-ordering property. *) 151(*---------------------------------------------------------------------------*) 152 153val WOP_THM = Q.prove 154(`!P. (?n. P n) ==> ?min. P min /\ !k. P k ==> min <= k`, 155 METIS_TAC [arithmeticTheory.WOP,DECIDE ``~(m<n) ==> n<=m``]); 156 157fun MINCHOOSE (store_name,const_name,tm) = 158 let val (V,body) = strip_forall tm 159 val P = snd(dest_comb body) 160 val wop_thm = BETA_RULE(SPEC P WOP_THM) 161 val min_term = snd(dest_imp (concl wop_thm)) 162 val min_term_pred = snd(dest_comb min_term) 163 val th1 = BETA_RULE(GSYM (ISPECL [body,min_term_pred] RIGHT_EXISTS_IMP_THM)) 164 val th2 = EQ_MP th1 wop_thm 165 val th3 = GENL V th2 166 val th4 = Ho_Rewrite.REWRITE_RULE[SKOLEM_THM] th3 167 val th5 = Ho_Rewrite.REWRITE_RULE[GSYM LEFT_FORALL_IMP_THM] th4 168 in 169 new_specification(store_name, [const_name], th5) 170 end 171 handle e => raise wrap_exn "" "MINCHOOSE" e; 172 173val rdepth_thm = 174 MINCHOOSE ("rdepth_thm", "rdepth", 175 ``!seen worklist acc. ?d. IS_SOME(Brz seen worklist acc d)``) 176 177(*---------------------------------------------------------------------------*) 178(* Define Brzozo *) 179(*---------------------------------------------------------------------------*) 180 181val Brzozo_def = 182 Define 183 `Brzozo seen worklist acc = 184 THE (Brz seen worklist acc (rdepth seen worklist acc))`; 185 186 187(*---------------------------------------------------------------------------*) 188(* Lemmas about Brz and definedness *) 189(*---------------------------------------------------------------------------*) 190 191val IS_SOME_Brz = Q.prove 192(`!d seen worklist acc. 193 IS_SOME (Brz seen worklist acc d) 194 ==> 195 d <> 0`, 196 Cases >> rw[Once Brz_def]) 197 198val Brz_SOME = Q.prove 199(`!d seen worklist acc res. 200 (Brz seen worklist acc d = SOME res) 201 ==> d <> 0`, 202 METIS_TAC [IS_SOME_Brz,IS_SOME_EXISTS]); 203 204val Brz_dlem = Q.prove 205(`!d seen worklist acc. 206 IS_SOME (Brz seen worklist acc d) 207 ==> 208 (Brz seen worklist acc d = Brz seen worklist acc (SUC d))`, 209 Ho_Rewrite.REWRITE_TAC [IS_SOME_EXISTS,GSYM LEFT_FORALL_IMP_THM] 210 >> Induct 211 >- metis_tac [Brz_SOME] 212 >- (rw[] >> rw[Once Brz_def,LET_THM] 213 >> pop_assum mp_tac 214 >> BasicProvers.EVERY_CASE_TAC 215 >- rw [Brz_def] 216 >- (rw [Once Brz_def] >> metis_tac []) 217 >- (DISCH_THEN (mp_tac o SIMP_RULE list_ss [Once Brz_def]) 218 >> rw[LET_THM] 219 >> metis_tac[])) 220); 221 222val Brz_monotone = Q.prove 223(`!d1 d2 seen worklist acc. 224 IS_SOME(Brz seen worklist acc d1) /\ d1 <= d2 225 ==> (Brz seen worklist acc d1 = Brz seen worklist acc d2)`, 226 RW_TAC arith_ss [LESS_EQ_EXISTS] THEN 227 Induct_on `p` THEN METIS_TAC [ADD_CLAUSES,Brz_dlem]); 228 229val Brz_norm = Q.prove 230(`!d seen worklist acc. 231 IS_SOME(Brz seen worklist acc d) 232 ==> 233 (Brz seen worklist acc d = Brz seen worklist acc (rdepth seen worklist acc))`, 234 METIS_TAC [Brz_monotone,rdepth_thm]); 235 236val Brz_determ = Q.store_thm("Brz_determ", 237 `!d1 d2 seen worklist acc. 238 IS_SOME(Brz seen worklist acc d1) /\ IS_SOME(Brz seen worklist acc d2) 239 ==> (Brz seen worklist acc d1 = Brz seen worklist acc d2)`, 240 METIS_TAC [Brz_norm]); 241 242(*---------------------------------------------------------------------------*) 243(* Derive eqns for dom_Brz *) 244(*---------------------------------------------------------------------------*) 245 246val lem = Q.prove 247(`!seen worklist acc. 248 (worklist = []) ==> IS_SOME (Brz seen worklist acc 1)`, 249 RW_TAC arith_ss [Once Brz_def]); 250 251val dom_base_case = Q.prove 252(`!seen worklist acc. 253 (worklist = []) ==> dom_Brz seen worklist acc`, 254 METIS_TAC [dom_Brz_def, lem]); 255 256val step2_lem1a = Q.prove 257(`!seen worklist acc r t. 258 (worklist = r::t) /\ mem_regexp r seen /\ dom_Brz seen worklist acc 259 ==> dom_Brz seen t acc`, 260 RW_TAC std_ss [dom_Brz_def] THEN 261 `d<>0` by METIS_TAC [IS_SOME_Brz] THEN 262 Q.EXISTS_TAC `d-1` THEN 263 Q.PAT_X_ASSUM `IS_SOME arg` (MP_TAC o ONCE_REWRITE_RULE [Brz_def]) THEN 264 CASE_TAC THEN RW_TAC arith_ss [LET_THM] 265 THEN METIS_TAC[]); 266 267val step2_lem1b = Q.prove 268(`!seen worklist acc r t arcs. 269 (worklist = r::t) /\ ~mem_regexp r seen /\ 270 (arcs = transitions r) /\ dom_Brz seen worklist acc 271 ==> dom_Brz (insert_regexp r seen) 272 (remove_dups (MAP SND arcs) ++ t) 273 (build_table arcs r acc)`, 274 RW_TAC std_ss [dom_Brz_def] THEN 275 `d<>0` by METIS_TAC [IS_SOME_Brz] THEN 276 Q.EXISTS_TAC `d-1` THEN 277 Q.PAT_X_ASSUM `IS_SOME arg` (MP_TAC o ONCE_REWRITE_RULE [Brz_def]) THEN 278 CASE_TAC THEN RW_TAC arith_ss [LET_THM] 279 THEN METIS_TAC[]); 280 281val step2_lem2a = Q.prove 282(`!seen worklist acc r t. 283 (worklist = r::t) /\ mem_regexp r seen /\ dom_Brz seen t acc 284 ==> dom_Brz seen worklist acc`, 285 rw_tac std_ss [dom_Brz_def] 286 >> Q.EXISTS_TAC `SUC d` 287 >> rw [Once Brz_def]); 288 289val step2_lem2b = Q.prove 290(`!seen worklist acc r t arcs. 291 (worklist = r::t) /\ (arcs = transitions r) /\ 292 ~mem_regexp r seen /\ 293 dom_Brz (insert_regexp r seen) 294 (remove_dups (MAP SND arcs) ++ t) 295 (build_table arcs r acc) 296 ==> dom_Brz seen worklist acc`, 297 rw_tac std_ss [dom_Brz_def] 298 >> Q.EXISTS_TAC `SUC d` 299 >> rw [Once Brz_def,LET_THM]); 300 301(*---------------------------------------------------------------------------*) 302(* Equational characterization of dom. *) 303(*---------------------------------------------------------------------------*) 304 305val dom_Brz_eqns = Q.store_thm 306("dom_Brz_eqns", 307 `(dom_Brz seen [] acc = T) /\ 308 (dom_Brz seen (r::t) acc = 309 if mem_regexp r seen 310 then dom_Brz seen t acc 311 else let arcs = transitions r 312 in dom_Brz (insert_regexp r seen) 313 (remove_dups (MAP SND arcs) ++ t) 314 (build_table arcs r acc))`, 315 CONJ_TAC 316 >- metis_tac [dom_base_case] 317 >- metis_tac [step2_lem1a,step2_lem1b,step2_lem2a,step2_lem2b,LET_THM]); 318 319(*---------------------------------------------------------------------------*) 320(* Optimization: dom_Brz doesn't depend on its "acc" argument, so can be *) 321(* replaced by a version that doesn't have it. *) 322(*---------------------------------------------------------------------------*) 323 324val dom_Brz_alt_def = 325 Define 326 `dom_Brz_alt seen worklist = dom_Brz seen worklist ARB`; 327 328val acc_irrelevant = Q.prove 329(`!n seen worklist acc acc1. 330 IS_SOME (Brz seen worklist acc n) ==> 331 IS_SOME (Brz seen worklist acc1 n)`, 332 Induct 333 >- rw [Once Brz_def] 334 >- (ONCE_REWRITE_TAC [Brz_def] 335 >> rw_tac list_ss [LET_THM] 336 >> BasicProvers.EVERY_CASE_TAC 337 >- rw [] 338 >- metis_tac[] 339 >- metis_tac[])); 340 341val dom_Brz_alt_equal = Q.store_thm 342("dom_Brz_alt_equal", 343 `!seen worklist acc. dom_Brz seen worklist acc = dom_Brz_alt seen worklist`, 344 rw [dom_Brz_def, dom_Brz_alt_def] >> metis_tac [acc_irrelevant]); 345 346 347(*---------------------------------------------------------------------------*) 348(* |- dom_Brz_alt seen [] /\ *) 349(* (dom_Brz_alt seen (r::t) = *) 350(* if mem_regexp r seen then *) 351(* dom_Brz_alt seen t *) 352(* else *) 353(* (let arcs = transitions r *) 354(* in dom_Brz_alt (insert_regexp r seen) *) 355(* (remove_dups (MAP SND arcs) ++ t))) *) 356(*---------------------------------------------------------------------------*) 357 358val dom_Brz_alt_eqns = save_thm 359("dom_Brz_alt_eqns", 360 SIMP_RULE bool_ss [dom_Brz_alt_equal] dom_Brz_eqns); 361 362(*---------------------------------------------------------------------------*) 363(* Recursion equations for Brzozo *) 364(*---------------------------------------------------------------------------*) 365 366val Brzozo_base = Q.prove 367(`!seen worklist acc. 368 dom_Brz seen worklist acc /\ (worklist = []) 369 ==> 370 (Brzozo seen worklist acc = (seen,acc))`, 371 RW_TAC std_ss [Brzozo_def,dom_Brz_def] 372 >> `rdepth seen [] acc <> 0` 373 by METIS_TAC [IS_SOME_Brz,rdepth_thm] 374 >> RW_TAC arith_ss [Once Brz_def]); 375 376val Brzozo_rec1 = Q.prove 377(`!seen worklist accr r t. 378 (worklist = r::t) /\ 379 dom_Brz seen worklist acc /\ 380 mem_regexp r seen 381 ==> 382 (Brzozo seen worklist acc = Brzozo seen t acc)`, 383 RW_TAC std_ss [Brzozo_def,dom_Brz_def] 384 >> `d <> 0` by METIS_TAC [IS_SOME_Brz] 385 >> `Brz seen (r::t) acc d = Brz seen t acc (d-1)` 386 by rw_tac list_ss [Once Brz_def] 387 >> METIS_TAC [IS_SOME_EXISTS,NOT_SOME_NONE,THE_DEF,Brz_norm]); 388 389val Brzozo_rec2 = Q.prove 390(`!seen worklist accr r t. 391 (worklist = r::t) /\ 392 dom_Brz seen worklist acc /\ 393 ~mem_regexp r seen 394 ==> 395 (Brzozo seen worklist acc = 396 let arcs = transitions r 397 in Brzozo (insert_regexp r seen) 398 (remove_dups (MAP SND arcs) ++ t) 399 (build_table arcs r acc))`, 400 rw_tac std_ss [Brzozo_def,dom_Brz_def,LET_THM] 401 >> `d <> 0` by METIS_TAC [IS_SOME_Brz] 402 >> `Brz seen (r::t) acc d = 403 let arcs = transitions r 404 in Brz (insert_regexp r seen) 405 (remove_dups (MAP SND arcs) ++ t) 406 (build_table arcs r acc) (d-1)` 407 by rw_tac list_ss [Once Brz_def,LET_THM] 408 >> METIS_TAC [IS_SOME_EXISTS,NOT_SOME_NONE,THE_DEF,Brz_norm,LET_THM]); 409 410(*---------------------------------------------------------------------------*) 411(* Equational characterization of Brzozo. *) 412(*---------------------------------------------------------------------------*) 413 414val Brzozo_eqns = Q.prove 415(`!seen worklist acc. 416 dom_Brz seen worklist acc 417 ==> 418 (Brzozo seen worklist acc = 419 case worklist of 420 [] => (seen,acc) 421 | r::t => 422 if mem_regexp r seen then 423 Brzozo seen t acc 424 else 425 let arcs = transitions r 426 in 427 Brzozo (insert_regexp r seen) 428 (remove_dups (MAP SND arcs) ++ t) 429 (build_table arcs r acc))`, 430 Cases_on `worklist` >> rpt gen_tac >> CASE_TAC >> rw[LET_THM] 431 >- metis_tac [Brzozo_base] 432 >- metis_tac [Brzozo_rec1] 433 >- metis_tac [Brzozo_rec2] 434); 435 436(*---------------------------------------------------------------------------*) 437(* Now prove induction theorem. This is based on using rdepth as a measure *) 438(* on the recursion. Thus we first have some lemmas about how rdepth *) 439(* decreases in recursive calls. *) 440(*---------------------------------------------------------------------------*) 441 442val step3a_lt = Q.prove 443(`!seen worklist acc r t. 444 (worklist = r::t) /\ mem_regexp r seen /\ 445 dom_Brz seen worklist acc 446 ==> rdepth seen t acc < rdepth seen worklist acc`, 447 rw_tac std_ss [dom_Brz_def] THEN IMP_RES_TAC rdepth_thm THEN 448 `rdepth seen (r::t) acc <> 0` by METIS_TAC [IS_SOME_Brz] THEN 449 `rdepth seen (r::t) acc - 1 < rdepth seen (r::t) acc` by DECIDE_TAC THEN 450 `IS_SOME (Brz seen t acc (rdepth seen (r::t) acc - 1))` 451 by (Q.PAT_X_ASSUM `IS_SOME (Brz seen (r::t) acc (rdepth seen (r::t) acc))` 452 (mp_tac o SIMP_RULE arith_ss [Once Brz_def]) 453 >> CASE_TAC >> rw[LET_THM] 454 >- metis_tac[] 455 >- metis_tac[]) 456 >> `rdepth seen t acc <= rdepth seen (r::t) acc - 1` 457 by metis_tac [rdepth_thm] 458 >> decide_tac); 459 460val step3b_lt = Q.prove 461(`!seen worklist acc r t arcs. 462 (worklist = r::t) /\ ~mem_regexp r seen /\ 463 (arcs = transitions r) /\ dom_Brz seen worklist acc 464 ==> rdepth (insert_regexp r seen) 465 (remove_dups (MAP SND arcs) ++ t) 466 (build_table arcs r acc) < rdepth seen worklist acc`, 467 rw_tac std_ss [dom_Brz_def] THEN IMP_RES_TAC rdepth_thm 468 >> `rdepth seen (r::t) acc <> 0` by METIS_TAC [IS_SOME_Brz] 469 >> `rdepth seen (r::t) acc - 1 < rdepth seen (r::t) acc` by DECIDE_TAC 470 >> `IS_SOME (Brz (insert_regexp r seen) 471 (remove_dups (MAP SND (transitions r)) ++ t) 472 (build_table (transitions r) r acc) 473 (rdepth seen (r::t) acc - 1))` 474 by (Q.PAT_X_ASSUM `IS_SOME (Brz seen (r::t) acc (rdepth seen (r::t) acc))` 475 (mp_tac o SIMP_RULE arith_ss [Once Brz_def]) 476 >> CASE_TAC >> rw[LET_THM] 477 >- metis_tac[] 478 >- metis_tac[]) 479 >> `rdepth (insert_regexp r seen) 480 (remove_dups (MAP SND (transitions r)) ++ t) 481 (build_table (transitions r) r acc) <= rdepth seen (r::t) acc - 1` 482 by metis_tac [rdepth_thm] 483 >> decide_tac); 484 485(*---------------------------------------------------------------------------*) 486(* Induction for Brzozo is obtained by instantiating the WF induction *) 487(* theorem with the rdepth measure and then simplifying. *) 488(*---------------------------------------------------------------------------*) 489 490val ind0 = MATCH_MP relationTheory.WF_INDUCTION_THM 491 (Q.ISPEC `\(seen,worklist,acc). rdepth seen worklist acc` 492 prim_recTheory.WF_measure); 493val ind1 = SIMP_RULE std_ss [prim_recTheory.measure_thm] ind0; 494val ind2 = SIMP_RULE std_ss [pairTheory.FORALL_PROD] 495 (Q.ISPEC `\(seen,worklist,acc). 496 dom_Brz seen worklist acc ==> P seen worklist acc` ind1); 497val ind3 = Q.prove 498(`!P. (!seen worklist acc. 499 (!a b c. rdepth a b c < rdepth seen worklist acc 500 ==> dom_Brz a b c 501 ==> P a b c) 502 ==> dom_Brz seen worklist acc ==> P seen worklist acc) 503 ==> 504 !seen worklist acc. dom_Brz seen worklist acc ==> P seen worklist acc`, 505rpt strip_tac 506 >> pop_assum mp_tac 507 >> Cases_on `acc` 508 >> Cases_on `r` 509 >> Q.SPEC_TAC (`r'`,`w`) 510 >> Q.SPEC_TAC (`q'`,`v`) 511 >> Q.SPEC_TAC (`q`,`u`) 512 >> Q.ID_SPEC_TAC `worklist` 513 >> Q.ID_SPEC_TAC `seen` 514 >> match_mp_tac ind2 515 >> fs [FORALL_PROD]); 516 517val Brzozowski_ind = Q.store_thm 518("Brzozowski_ind", 519 `!P. 520 (!seen worklist acc. 521 dom_Brz seen worklist acc /\ 522 (!r t. (worklist = r::t) /\ mem_regexp r seen ==> P seen t acc) /\ 523 (!r t arcs. (arcs = transitions r) /\ (worklist = r::t) /\ ~mem_regexp r seen 524 ==> P (insert_regexp r seen) 525 (remove_dups (MAP SND arcs) ++ t) 526 (build_table arcs r acc)) 527 ==> P seen worklist acc) 528 ==> !seen worklist acc. dom_Brz seen worklist acc ==> P seen worklist acc`, 529 gen_tac >> strip_tac 530 >> HO_MATCH_MP_TAC ind3 531 >> rw[] 532 >> metis_tac [step3a_lt,step3b_lt,dom_Brz_eqns]); 533 534(*---------------------------------------------------------------------------*) 535(* Efficient executable version of Brzozo *) 536(*---------------------------------------------------------------------------*) 537 538val exec_Brz_def = 539 Define 540 `exec_Brz seen worklist acc d = 541 if d=0n then 542 (if dom_Brz seen worklist acc then Brzozo seen worklist acc else ARB) 543 else 544 case worklist 545 of [] => (seen,acc) 546 | r::t => 547 if mem_regexp r seen then 548 exec_Brz seen t acc (d ��� 1) 549 else 550 let arcs = transitions r 551 in exec_Brz (insert_regexp r seen) 552 (remove_dups (MAP SND arcs) ++ t) 553 (build_table arcs r acc) (d ��� 1)`; 554 555val exec_Brz_equals_Brzozo = Q.prove 556(`!d seen worklist acc. 557 dom_Brz seen worklist acc 558 ==> 559 (exec_Brz seen worklist acc d = Brzozo seen worklist acc)`, 560 Induct 561 >> rw_tac std_ss [Once exec_Brz_def,LET_THM] 562 >> pop_assum mp_tac 563 >> CASE_TAC 564 >- rw [Brzozo_eqns,dom_Brz_eqns] 565 >- rw [Brzozo_eqns,dom_Brz_eqns,LET_THM]); 566 567val Brzozowski_def = 568 Define 569 `Brzozowski seen worklist acc = 570 if dom_Brz seen worklist acc then 571 Brzozo seen worklist acc 572 else 573 exec_Brz seen worklist acc MAXNUM_32`; 574 575(*---------------------------------------------------------------------------*) 576(* Theorem showing that Brzozowski can be executed w.o. termination proof. *) 577(* In order to evaluate Brzozowski, we can dispatch to exec_Brz. *) 578(*---------------------------------------------------------------------------*) 579 580val Brzozowski_exec_Brz = Q.store_thm 581("Brzozowski_exec_Brz", 582 `Brzozowski seen worklist acc = exec_Brz seen worklist acc MAXNUM_32`, 583 rw_tac std_ss [Brzozowski_def,exec_Brz_equals_Brzozo]); 584 585(*---------------------------------------------------------------------------*) 586(* In order to reason about Brzozowski, we use the following theorem. *) 587(*---------------------------------------------------------------------------*) 588 589val Brzozowski_eqns = Q.store_thm 590("Brzozowski_eqns", 591 `dom_Brz seen worklist acc ==> 592 (Brzozowski seen worklist acc = 593 case worklist 594 of [] => (seen,acc) 595 | r::t => 596 if mem_regexp r seen then 597 Brzozowski seen t acc 598 else 599 let arcs = transitions r 600 in Brzozowski (insert_regexp r seen) 601 (remove_dups (MAP SND arcs) ++ t) 602 (build_table arcs r acc))`, 603 rw_tac std_ss [Brzozowski_def] 604 >> CASE_TAC 605 >- rw [Brzozo_eqns] 606 >- (rw [Brzozo_eqns,LET_THM] 607 >> metis_tac [exec_Brz_equals_Brzozo,dom_Brz_eqns])); 608 609 610(*---------------------------------------------------------------------------*) 611(* Return to definition of compiler *) 612(*---------------------------------------------------------------------------*) 613 614val get_accepts_def = 615 Define 616 `get_accepts fmap = 617 mergesort (\a b:num. a <= b) 618 (balanced_map$foldrWithKey 619 (\r (n:num) nlist. if nullable r then n::nlist else nlist) 620 [] fmap)`; 621 622val accepts_to_vector_def = 623 Define 624 `accepts_to_vector accept_states max_state = 625 alist_to_vec (MAP (\x. (x,T)) accept_states) F max_state max_state`; 626 627val table_to_vectors_def = 628 Define 629 `table_to_vectors table = 630 MAP (\(k:num,table2). 631 alist_to_vec (mergesort (inv_image (\a b:num. a <= b) FST) 632 (MAP (\(c,n). (c, SOME n)) table2)) 633 NONE alphabet_size alphabet_size) 634 (mergesort (inv_image (\a b:num. a <= b) FST) table)`; 635 636val compile_regexp_def = 637 Define 638 `compile_regexp r = 639 let r' = normalize r in 640 let (states,last_state,state_numbering,table) 641 = Brzozowski balanced_map$empty [r'] 642 (1n, balanced_map$singleton r' 0n, []) in 643 let delta_vecs = table_to_vectors table in 644 let accepts_vec = accepts_to_vector (get_accepts state_numbering) last_state 645 in 646 (state_numbering, 647 delta_vecs, 648 accepts_vec)`; 649 650val exec_dfa_def = 651 Define 652 `exec_dfa finals table n s = 653 case s of 654 | "" => sub finals n 655 | c::t => 656 case sub (sub table n) (ORD c) of 657 | NONE => F 658 | SOME k => exec_dfa finals table k t`; 659 660val exec_dfa_thm = Q.prove 661(`(exec_dfa finals table n [] = sub finals n) /\ 662 (exec_dfa finals table n (c::t) = 663 case sub (sub table n) (ORD c) of 664 | NONE => F 665 | SOME k => exec_dfa finals table k t)`, 666CONJ_TAC 667 >- rw_tac list_ss [exec_dfa_def] 668 >- rw_tac list_ss [SimpLHS, Once exec_dfa_def]) 669 670(* 671val exec_dfa_def = 672 Define 673 `(exec_dfa finals table n [] = EL n finals) /\ 674 (exec_dfa finals table n (c::t) = 675 case EL (ORD c) (EL n table) of 676 | NONE => F 677 | SOME k => exec_dfa finals table k t)`; 678*) 679 680 681(*---------------------------------------------------------------------------*) 682(* Returns a function of type :string -> bool *) 683(*---------------------------------------------------------------------------*) 684 685val regexp_matcher_def = 686 Define 687 `regexp_matcher r = 688 let (state_numbering,delta,accepts) = compile_regexp r in 689 let start_state = THE (balanced_map$lookup regexp_compare 690 (normalize r) state_numbering) in 691 let acceptsV = fromList accepts in 692 let deltaV = fromList (MAP fromList delta) 693 in 694 exec_dfa acceptsV deltaV start_state`; 695 696(* 697val regexp_matcher_def = 698 Define 699 `regexp_matcher r = 700 let (state_numbering,delta,accepts) = compile_regexp r in 701 let start_state_opt = balanced_map$lookup regexp_compare (normalize r) state_numbering 702 in 703 exec_dfa accepts delta (THE start_state_opt)`; 704*) 705 706(*---------------------------------------------------------------------------*) 707(* get_accepts properties *) 708(*---------------------------------------------------------------------------*) 709 710val sorted_get_accepts = Q.store_thm 711("sorted_get_accepts", 712 `!fmap. SORTED $<= (get_accepts fmap)`, 713 RW_TAC set_ss [get_accepts_def] THEN 714 CONV_TAC (DEPTH_CONV ETA_CONV) THEN 715 MATCH_MP_TAC mergesort_sorted THEN 716 RW_TAC arith_ss [transitive_def, total_def]); 717 718val lookup_bin = Q.prove 719(`!fmap fmap' n k v x. 720 (lookup cmp r (Bin n k v fmap fmap') = SOME x) 721 <=> 722 ((cmp r k = Equal) /\ (v = x)) \/ 723 ((cmp r k = Less) /\ (lookup cmp r fmap = SOME x)) \/ 724 ((cmp r k = Greater) /\ (lookup cmp r fmap' = SOME x))`, 725 RW_TAC list_ss [lookup_def] THEN BasicProvers.CASE_TAC); 726 727val member_iff_lookup = Q.prove 728(`!fmap cmp x. member cmp x fmap <=> ?y. lookup cmp x fmap = SOME y`, 729 Induct 730 >> rw_tac set_ss [member_def, lookup_def] 731 >> BasicProvers.EVERY_CASE_TAC); 732 733val member_bin = Q.prove 734(`!fmap fmap' n k v. 735 member cmp r (Bin n k v fmap fmap') 736 <=> 737 ((cmp r k = Equal) \/ 738 ((cmp r k = Less) /\ member cmp r fmap) \/ 739 ((cmp r k = Greater) /\ member cmp r fmap'))`, 740 RW_TAC list_ss [member_def] THEN BasicProvers.CASE_TAC); 741 742val key_less_lookup = Q.prove 743(`!fmap cmp k1 k2 x. 744 invariant cmp fmap /\ good_cmp cmp /\ 745 key_ordered cmp k1 fmap Less /\ 746 (lookup cmp k2 fmap = SOME x) 747 ==> 748 (cmp k1 k2 = Less)`, 749Induct >> rw_tac list_ss [key_ordered_def, lookup_def, invariant_def] 750 >> every_case_tac 751 >- metis_tac [] 752 >- metis_tac [good_cmp_def,comparison_distinct] 753 >- metis_tac []); 754 755val key_greater_lookup = Q.prove 756(`!fmap cmp k1 k2 x. 757 invariant cmp fmap /\ good_cmp cmp /\ 758 key_ordered cmp k1 fmap Greater /\ 759 (lookup cmp k2 fmap = SOME x) 760 ==> 761 (cmp k2 k1 = Less)`, 762Induct >> rw_tac list_ss [key_ordered_def, lookup_def, invariant_def] 763 >> every_case_tac 764 >- metis_tac [] 765 >- metis_tac [good_cmp_def,comparison_distinct] 766 >- metis_tac []); 767 768val lemma = Q.prove 769(`!fmap x acc. 770 balanced_map$invariant regexp_compare fmap /\ 771 MEM x (foldrWithKey 772 (��r n nlist. if nullable r then n::nlist else nlist) 773 acc fmap) 774 ==> 775 MEM x acc \/ ?r. nullable r /\ (lookup regexp_compare r fmap = SOME x)`, 776 Induct THEN RW_TAC std_ss [] 777 >- full_simp_tac list_ss [foldrWithKey_def,lookup_def] 778 >- (simp_tac list_ss [lookup_bin] 779 >> 780 `invariant regexp_compare fmap /\ 781 invariant regexp_compare fmap' /\ 782 key_ordered regexp_compare k fmap Greater /\ 783 key_ordered regexp_compare k fmap' Less` 784 by metis_tac [invariant_def] 785 >> qpat_x_assum `invariant cmp (Bin n k v fmap fmap')` (K ALL_TAC) 786 >> fs [] 787 >> RULE_ASSUM_TAC (REWRITE_RULE [foldrWithKey_def]) 788 >> RES_THEN MP_TAC >> BETA_TAC >> rw[] 789 >- metis_tac [regexp_compare_eq] 790 >- metis_tac [key_less_lookup,regexp_compare_good,good_cmp_thm] 791 >- metis_tac [key_greater_lookup,regexp_compare_good,good_cmp_thm] 792 >- metis_tac [key_less_lookup,regexp_compare_good,good_cmp_thm] 793 >- metis_tac [key_greater_lookup,regexp_compare_good,good_cmp_thm]));; 794 795val mem_acc = Q.prove 796(`!P fmap x acc. 797 MEM x acc 798 ==> 799 MEM x (foldrWithKey 800 (��r n nlist. if P r then n::nlist else nlist) 801 acc fmap)`, 802 gen_tac >> Induct >> RW_TAC std_ss [foldrWithKey_def] >> metis_tac [MEM]); 803 804val lemma1 = Q.prove 805(`!P fmap r x acc. 806 P r /\ (lookup regexp_compare r fmap = SOME x) 807 ==> 808 MEM x (foldrWithKey (��r n nlist. if P r then n::nlist else nlist) acc fmap)`, 809 gen_tac >> Induct 810 >- rw_tac list_ss [lookup_def,foldrWithKey_def] 811 >- (rw_tac list_ss [lookup_bin,regexp_compare_eq,foldrWithKey_def] 812 >> metis_tac [mem_acc,MEM])); 813 814val mem_get_accepts = Q.store_thm 815("mem_get_accepts", 816 `!bmap x. 817 invariant regexp_compare bmap ==> 818 (MEM x (get_accepts bmap) 819 <=> 820 ?r. nullable r /\ (lookup regexp_compare r bmap = SOME x))`, 821 rw_tac list_ss [get_accepts_def,mergesort_mem] >> metis_tac [lemma,lemma1,MEM]); 822 823 824(*---------------------------------------------------------------------------*) 825(* Correctness of regexp compiler *) 826(*---------------------------------------------------------------------------*) 827 828val fmap_inj_def = 829 Define 830 `fmap_inj cmp bmap = 831 !x y. x IN fdom cmp bmap /\ (lookup cmp x bmap = lookup cmp y bmap) 832 ==> (cmp x y = Equal)`; 833 834val extend_states_inv_def = 835 Define 836 `extend_states_inv next_state state_map table = 837 (invariant regexp_compare state_map /\ 838 fmap_inj regexp_compare state_map /\ 839 (frange regexp_compare state_map = count next_state) /\ 840 (!n. MEM n (MAP SND table) ��� n < next_state))`; 841 842val extend_states_inv = Q.prove ( 843`!next_state state_map table states next_state' state_map' table'. 844 (extend_states next_state state_map table states = (next_state',state_map',table')) ��� 845 extend_states_inv next_state state_map table 846 ��� 847 extend_states_inv next_state' state_map' table'`, 848 ho_match_mp_tac extend_states_ind 849 >> rw [extend_states_def] 850 >> BasicProvers.EVERY_CASE_TAC 851 >> fs [] 852 >> FIRST_X_ASSUM match_mp_tac 853 >> rw [] 854 >> fs [extend_states_inv_def, frange_def, MEM_MAP,EXTENSION,GSYM LEFT_FORALL_IMP_THM] 855 >> `good_cmp regexp_compare /\ eq_cmp regexp_compare` 856 by metis_tac [eq_cmp_regexp_compare,regexp_compare_good] 857 >> rw_tac set_ss [] 858 >- metis_tac [insert_thm] 859 >- (fs [fmap_inj_def,lookup_insert_thm] 860 >> rw [regexp_compare_eq,fdom_insert] 861 >> metis_tac [DECIDE ``x < (x:num) ==> F``,eq_cmp_def]) 862 >- (rw [lookup_insert_thm,regexp_compare_eq,EQ_IMP_THM] 863 >- (pop_assum mp_tac >> rw_tac set_ss [] 864 >> metis_tac [DECIDE ``x < x + 1n``,LESS_TRANS]) 865 >- (`x < next_state \/ (x=next_state)` by DECIDE_TAC 866 >> metis_tac [NOT_SOME_NONE])) 867 >- rw_tac set_ss [] 868 >- metis_tac [LESS_TRANS,DECIDE ``x < x+1n``,IN_DEF] 869 >- (rw_tac set_ss [] >> metis_tac[]) 870 >- metis_tac [LESS_TRANS,DECIDE ``x < x+1n``,IN_DEF]) 871 872val submap_def = 873 Define 874 `submap cmp t1 t2 = 875 !x. x IN fdom cmp t1 ==> x IN fdom cmp t2 /\ (lookup cmp x t1 = lookup cmp x t2)`; 876 877val submap_id = Q.prove 878(`!t cmp. submap cmp t t`, 879 rw [submap_def]); 880 881val submap_trans = Q.prove 882(`!t1 t2 t3 cmp. submap cmp t1 t2 /\ submap cmp t2 t3 ==> submap cmp t1 t3`, 883 rw [submap_def]); 884 885val submap_mono = Q.prove 886(`!t1 t2 k v cmp. 887 eq_cmp cmp /\ invariant cmp t2 /\ submap cmp t1 t2 888 /\ k NOTIN fdom cmp t1 889 ==> 890 submap cmp t1 (insert cmp k v t2)`, 891 rw [submap_def,fdom_insert] 892 >> res_tac 893 >> `good_cmp cmp` by metis_tac [eq_cmp_def] 894 >> rw [lookup_insert_thm] 895 >> `k=x` by metis_tac [eq_cmp_def] 896 >> rw [] 897 >> metis_tac[]); 898 899val submap_insert = Q.prove 900(`!bmap t cmp x v. 901 eq_cmp cmp /\ invariant cmp bmap /\ 902 x NOTIN fdom cmp bmap /\ 903 submap cmp (insert cmp x v bmap) t 904 ==> submap cmp bmap t`, 905 rw_tac set_ss [submap_def] 906 >> `invariant cmp (insert cmp x v bmap)` by metis_tac [insert_thm,eq_cmp_def] 907 >- (qpat_x_assum `$! M` (mp_tac o Q.ID_SPEC) >> rw_tac set_ss [fdom_insert]) 908 >- (`~(x' = x)` by (fs [fdom_def] >> metis_tac[]) 909 >> `fdom cmp (insert cmp x v bmap) x'` by rw [fdom_insert,IN_DEF] 910 >> `good_cmp cmp` by metis_tac [eq_cmp_def] 911 >> `lookup cmp x' (insert cmp x v bmap) = lookup cmp x' t` by metis_tac[] 912 >> pop_assum (SUBST_ALL_TAC o SYM) 913 >> pat_elim `$!M` 914 >> rw_tac set_ss [lookup_insert_thm] 915 >> metis_tac [eq_cmp_def])); 916 917val set_lem = Q.prove 918(`!x s t. x IN s ==> (s UNION t = s UNION (x INSERT t))`, 919 rw_tac set_ss [SET_EQ_THM] >> metis_tac []); 920 921val fapply_def = 922 Define 923 `fapply cmp bmap x = THE (lookup cmp x bmap)`; 924 925val extend_states_thm = Q.prove 926(`!next_state state_map table states next_state' state_map' table'. 927 (extend_states next_state state_map table states = (next_state',state_map',table')) 928 /\ invariant regexp_compare state_map 929 ��� 930 (fdom regexp_compare state_map' = (fdom regexp_compare state_map UNION set (MAP SND states))) ��� 931 submap regexp_compare state_map state_map' ��� 932 next_state ��� next_state' ��� 933 (table' = REVERSE (MAP (\(c,r). (c, fapply regexp_compare state_map' r)) states) ++ table) /\ 934 invariant regexp_compare state_map'` 935 , 936 `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare] 937 >> ho_match_mp_tac extend_states_ind 938 >> rw [extend_states_def] 939 >> BasicProvers.EVERY_CASE_TAC 940 >> `invariant regexp_compare (insert regexp_compare r' next_state state_map)` 941 by metis_tac [insert_thm,eq_cmp_def] 942 >> fs [submap_id] 943 >> res_tac 944 >- (rw_tac set_ss [fdom_insert] >> metis_tac[]) 945 >- (`r' IN fdom regexp_compare state_map` by rw [fdom_def] 946 >> rw_tac set_ss [set_lem]) 947 >- (match_mp_tac submap_insert 948 >> rw [fdom_def] 949 >> metis_tac [NOT_SOME_NONE]) 950 >- decide_tac 951 >- (pat_elim `$! M` >> rw[] 952 >> qpat_x_assum `submap _ __ ___` mp_tac 953 >> rw_tac set_ss [submap_def,fapply_def] 954 >> pop_assum (mp_tac o Q.SPEC `r'`) 955 >> rw [fdom_insert] 956 >> pop_assum mp_tac 957 >> `good_cmp regexp_compare` by metis_tac [regexp_compare_good] 958 >> rw [lookup_insert_thm,regexp_compare_eq] 959 >> metis_tac [THE_DEF]) 960 >- (pat_elim `$! M` >> rw[] 961 >> qpat_x_assum `submap _ __ ___` mp_tac 962 >> rw_tac set_ss [submap_def,fapply_def] 963 >> pop_assum (mp_tac o Q.SPEC `r'`) 964 >> rw [fdom_def] 965 >> metis_tac [THE_DEF]) 966); 967 968val good_table_def = 969 Define 970 `good_table state_map table = 971 (ALL_DISTINCT (MAP FST table) ��� 972 (!n table2. 973 (ALOOKUP table n = SOME table2) 974 ��� 975 (set (MAP FST table2) = set ALPHABET)) ��� 976 (!n c n' table2. 977 (ALOOKUP table n = SOME table2) ��� 978 (ALOOKUP table2 c = SOME n') 979 ��� 980 MEM c ALPHABET ��� 981 ?r r'. 982 (lookup regexp_compare r state_map = SOME n) ��� 983 (lookup regexp_compare r' state_map = SOME n') ��� 984 (r' = smart_deriv c r)))`; 985 986val invar_def = Define `invar = invariant regexp_compare`; 987val dom_def = Define `dom = fdom regexp_compare`; 988val range_def = Define `range = frange regexp_compare`; 989val union_def = Define `union = ounion regexp_compare`; 990val set_def = Define `set = oset regexp_compare`; 991val image_def = Define `image = oimage regexp_compare`; 992val apply_def = Define `apply = fapply regexp_compare`; 993 994val Brz_invariant_def = 995 Define 996 `Brz_invariant seen todo (next_state,state_map,table) ��� 997 invar state_map /\ invar seen /\ 998 EVERY is_normalized todo ��� 999 (!r. mem_regexp r seen ��� is_normalized r) ��� 1000 (!r. r ��� dom state_map ��� is_normalized r) ��� 1001 (dom (union seen (set todo)) = dom state_map) ��� 1002 (range state_map = count next_state) ��� 1003 (set (MAP FST table) = fdom num_cmp (oimage num_cmp (apply state_map) seen)) ��� 1004 fmap_inj regexp_compare state_map ��� 1005 good_table state_map table`; 1006 1007val lem = Q.prove ( 1008`!b c d e f. Brz_invariant b c (d,e,f) ��� EVERY is_normalized c`, 1009 rw [Brz_invariant_def]); 1010 1011val fdom_ounion = Q.prove 1012(`!a b. good_cmp cmp /\ invariant cmp a /\ invariant cmp b 1013 ==> (fdom cmp (ounion cmp a b) = (fdom cmp a) UNION (fdom cmp b))`, 1014 rw[fdom_def,ounion_def,SET_EQ_THM] 1015 >> metis_tac [regexp_compare_good,good_oset_def, 1016 SIMP_RULE std_ss [oin_def,ounion_def, 1017 member_iff_lookup,oneTheory.one] oin_ounion]); 1018 1019val dom_union = Q.prove 1020(`!a b. invariant regexp_compare a /\ invariant regexp_compare b 1021 ==> (dom (union a b) = (dom a) UNION (dom b))`, 1022 metis_tac [regexp_compare_good,fdom_ounion,dom_def,union_def]) 1023 1024val invariant_oset = Q.store_thm 1025("invariant_oset", 1026 `!l. good_cmp cmp ==> invariant cmp (oset cmp l)`, 1027 simp_tac std_ss [oset_def] 1028 >> Induct 1029 >> fs [oempty_def,empty_def,oinsert_def] 1030 >- metis_tac [invariant_def] 1031 >- metis_tac [insert_thm]); 1032 1033val in_dom_oset = Q.store_thm 1034("in_dom_oset", 1035 `!l x. eq_cmp cmp ==> (x IN fdom cmp (oset cmp l) <=> MEM x l)`, 1036 Induct >> rw [] 1037 >- rw [oempty_def,empty_def,fdom_def,lookup_def] 1038 >- (`good_cmp cmp` by metis_tac [eq_cmp_def] 1039 >> imp_res_tac invariant_oset >> pop_assum (assume_tac o Q.SPEC `l`) 1040 >> rw [oset_def] 1041 >> rw [GSYM oset_def] 1042 >> rw [oinsert_def] 1043 >> rw_tac set_ss [fdom_insert])); 1044 1045val dom_set_append = Q.store_thm 1046("dom_set_append", 1047 `!l1 l2. dom (set (l1++l2)) = dom (set l1) UNION dom (set l2)`, 1048 rw[dom_def,set_def,EXTENSION] >> 1049 `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare] >> 1050 rw [in_dom_oset]); 1051 1052val member_insert = Q.store_thm 1053("member_insert", 1054 `!bmap x y v. 1055 eq_cmp cmp /\ invariant cmp bmap ==> 1056 (member cmp x (insert cmp y v bmap) <=> (x=y) \/ member cmp x bmap)`, 1057 rw [member_iff_lookup,GSYM (SIMP_RULE set_ss [SET_EQ_THM] fdom_def)] >> 1058 rw [fdom_insert] >> metis_tac [IN_DEF]); 1059 1060val triv_lem = Q.prove(`!s1 s2. (s1 = s2) ==> ((s1 UNION s) = (s2 UNION s))`,rw[]); 1061 1062val dom_oset_lem = Q.prove 1063(`!l. eq_cmp cmp ==> (fdom cmp (oset cmp l) = LIST_TO_SET l)`, 1064 rw [EXTENSION,in_dom_oset,eq_cmp_regexp_compare]); 1065 1066val mem_foldrWithKey = Q.prove 1067(`!(bset:'a oset) acc a. eq_cmp cmp /\ invariant cmp bset ==> 1068 (MEM (a,()) (foldrWithKey (\k x xs. (k,())::xs) acc bset) 1069 <=> 1070 (a IN fdom cmp bset) \/ MEM (a,()) acc)`, 1071simp_tac set_ss [fdom_def] 1072 >> Induct >> rw [foldrWithKey_def,invariant_def] >> fs [] 1073 >- rw [lookup_def] 1074 >- (`good_cmp cmp` by metis_tac [eq_cmp_def] 1075 >> rw [lookup_bin,EQ_IMP_THM] 1076 >> metis_tac [key_greater_lookup,key_less_lookup,good_cmp_thm,eq_cmp_def])); 1077 1078val mem_foldrWithKey_lem = 1079 mem_foldrWithKey 1080 |> Q.SPEC `bset` 1081 |> Q.SPEC `[]` 1082 |> SIMP_RULE list_ss [] 1083 1084val left_to_right = Q.prove 1085(`eq_cmp cmp ==> 1086 !list bmap k v. invariant cmp bmap /\ 1087 (lookup cmp k (FOLDR (��(k,v) t. insert cmp k v t) bmap list) = SOME v) 1088 ==> 1089 (MEM (k,v) list \/ (lookup cmp k bmap = SOME v))`, 1090 strip_tac 1091 >> `good_cmp cmp` by metis_tac [eq_cmp_def] 1092 >> Induct 1093 >- rw [] 1094 >- (Cases_on `h` >> rw [] 1095 >> pop_assum mp_tac 1096 >> `invariant cmp (FOLDR (��(k,v) t. insert cmp k v t) bmap list)` 1097 by (Q.SPEC_TAC (`list`,`L`) 1098 >> Induct >> rw [invariant_def] 1099 >> Cases_on `h` >> rw [] >> metis_tac [insert_thm]) 1100 >> rw [lookup_insert_thm] 1101 >> metis_tac [eq_cmp_def])); 1102 1103val invariant_ffoldr = Q.prove 1104(`!list aset f. 1105 good_cmp cmp /\ invariant cmp aset 1106 ==> 1107 invariant cmp (FOLDR (��(k,v) t. insert cmp (f k) v t) aset list)`, 1108 Induct >> rw [invariant_def] 1109 >> Cases_on `h` >> rw [] >> metis_tac [insert_thm]); 1110 1111val invariant_ffoldr_inst = 1112 invariant_ffoldr 1113 |> INST_TYPE [beta |-> ``:unit``, gamma |-> beta] 1114 1115val left_to_right_alt = Q.prove 1116(`eq_cmp (cmp:'b->'b->ordering) 1117 ==> 1118 !(list :('a # unit) list) (bset :'b oset) x f. 1119 invariant cmp bset /\ 1120 (lookup cmp x (FOLDR (��(k,v:unit) t. insert cmp (f k) () t) bset list) = SOME ()) 1121 ==> 1122 (?a. MEM (a,()) list /\ (x = f a)) 1123 \/ 1124 ((!a. MEM (a,()) list ==> (x <> f a)) /\ (lookup cmp x bset = SOME ()))`, 1125 strip_tac 1126 >> `good_cmp cmp` by metis_tac [eq_cmp_def] 1127 >> Induct 1128 >- rw [] 1129 >- (Cases_on `h` >> rw [] >> fs [] 1130 >> pop_assum mp_tac 1131 >> `invariant cmp (FOLDR (��(k,v:unit) t. insert cmp (f k) () t) bset list)` 1132 by (Q.SPEC_TAC (`list`,`L`) 1133 >> Induct >> rw [invariant_def] 1134 >> Cases_on `h` >> rw [] >> metis_tac [insert_thm]) 1135 >> rw [lookup_insert_thm] 1136 >> metis_tac [eq_cmp_def])); 1137 1138val left_to_right_lem = 1139 left_to_right_alt 1140 |> Q.GEN `cmp` 1141 |> Q.SPEC `cmp2` 1142 |> UNDISCH 1143 |> Q.SPEC `foldrWithKey (��(k:'a) (x:unit) xs. (k,())::xs) [] aset` 1144 |> Q.SPEC `Tip` 1145 |> Q.SPEC `x` 1146 |> Q.SPEC `f` 1147 |> DISCH_ALL; 1148 1149val oin_fdom = Q.prove 1150(`!aset x. oin cmp x aset <=> x IN fdom cmp aset`, 1151 rw [fdom_def, oin_def, member_iff_lookup]); 1152 1153val mem_oin = Q.prove 1154(`!list x aset. 1155 eq_cmp cmp /\ invariant cmp aset /\ 1156 MEM (x,()) list 1157 ==> 1158 oin cmp (f x) (FOLDR (\(k,u) s. insert cmp (f k) () s) aset list)`, 1159 Induct >> rw [oin_fdom] 1160 >> `good_cmp cmp` by metis_tac [eq_cmp_def] 1161 >> mp_tac (SPEC_ALL invariant_ffoldr_inst) 1162 >| [all_tac, Cases_on `h`] 1163 >> rw[fdom_insert] 1164 >> metis_tac [oin_fdom,IN_DEF]); 1165 1166val mem_oin_inst = 1167 mem_oin 1168 |> SPEC_ALL 1169 |> Q.INST [`cmp` |-> `cmp2`, `aset` |-> `Tip`, `x` |-> `x'`] 1170 |> Q.GEN `list` 1171 1172val fdom_oimage = Q.store_thm 1173("fdom_image", 1174 `!aset:'a oset. 1175 eq_cmp (cmp1:'a->'a->ordering) /\ 1176 eq_cmp (cmp2:'b->'b->ordering) /\ invariant cmp1 aset 1177 ==> (fdom cmp2 (oimage cmp2 f aset) = {f x | oin cmp1 x aset})`, 1178simp_tac set_ss 1179 [oimage_def,map_keys_def,balanced_mapTheory.fromList_def,fdom_def, 1180 toAscList_def,empty_def, 1181 rich_listTheory.FOLDR_MAP,LAMBDA_PROD,oneTheory.one] 1182 >> rw [EQ_IMP_THM] 1183 >- (mp_tac left_to_right_lem >> rw[invariant_def] 1184 >- (qexists_tac `a` >> rw[] 1185 >> pat_elim `lookup _ __ ___ = SOME ()` 1186 >> pat_elim `eq_cmp cmp2` 1187 >> `a IN fdom cmp1 aset` by metis_tac [mem_foldrWithKey_lem] 1188 >> pop_assum mp_tac 1189 >> rw[oin_def,member_iff_lookup,fdom_def]) 1190 >- fs [lookup_def]) 1191 >- (`MEM (x',()) (foldrWithKey (��k x xs. (k,())::xs) [] aset)` 1192 by metis_tac [oin_fdom,IN_DEF, mem_foldrWithKey_lem] 1193 >> mp_tac mem_oin_inst >> rw [invariant_def] 1194 >> res_tac 1195 >> fs [oin_def,member_iff_lookup,oneTheory.one]) 1196); 1197 1198val fdom_oimage_rw = 1199 fdom_oimage 1200 |> SIMP_RULE set_ss [GSPECIFICATION_applied]; 1201 1202val fdom_oimage_insert = Q.prove 1203(`!bset a f cmp1 cmp2. 1204 eq_cmp (cmp1:'a->'a->ordering) /\ 1205 eq_cmp (cmp2:'b->'b->ordering) /\ invariant cmp1 bset 1206 ==> (fdom cmp2 (oimage cmp2 f (oinsert cmp1 a bset)) 1207 = 1208 ((f a) INSERT fdom cmp2 (oimage cmp2 f bset)))`, 1209rpt strip_tac 1210 >> `invariant cmp1 (oinsert cmp1 a bset)` by metis_tac [insert_thm, eq_cmp_def,oinsert_def] 1211 >> mp_tac fdom_oimage_rw >> asm_simp_tac bool_ss[] 1212 >> rw_tac set_ss [] 1213 >> `good_cmp cmp1 /\ good_cmp cmp2` by metis_tac [eq_cmp_def] 1214 >> rw [oin_def, member_iff_lookup, oinsert_def] 1215 >> fs [eq_cmp_def,lookup_insert_thm] 1216 >> metis_tac[]); 1217 1218 1219val fdom_oimage_inst = 1220 fdom_oimage 1221 |> INST_TYPE [alpha |-> ``:regexp``, beta |-> ``:num``] 1222 |> Q.INST [`cmp1` |-> `regexp_compare`, `cmp2` |-> `num_cmp`] 1223 |> SIMP_RULE std_ss [eq_cmp_regexp_compare,num_cmp_good,num_cmp_antisym,eq_cmp_def] 1224 1225val Brz_inv_pres = Q.prove ( 1226`!todo1 todos seen next_state state_map table. 1227 Brz_invariant seen (todo1::todos) (next_state,state_map,table) ��� 1228 ~mem_regexp todo1 seen 1229 ��� 1230 Brz_invariant (insert_regexp todo1 seen) 1231 (MAP SND (transitions todo1) ++ todos) 1232 (build_table (transitions todo1) 1233 todo1 (next_state,state_map,table))`, 1234 rw_tac list_ss [good_table_def,insert_regexp_def, invar_def, 1235 Brz_invariant_def, build_table_def, transitions_def,set_def] >> 1236 imp_res_tac extend_states_thm >> 1237 `todo1 IN dom state_map` 1238 by (qpat_x_assum `dom (union a b) = dom state_map` (mp_tac o SYM) >> rw [] >> 1239 `invariant regexp_compare 1240 (oset regexp_compare (todo1::todos))` by metis_tac [invariant_oset,regexp_compare_good] 1241 >> rw [dom_union] 1242 >> disj2_tac 1243 >> `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare] 1244 >> rw [dom_def,in_dom_oset]) 1245 >> 1246 `todo1 IN dom state_map'` by metis_tac [submap_def,dom_def] 1247 >> 1248 pop_assum (strip_assume_tac o SIMP_RULE set_ss [dom_def, fdom_def]) >> rw [] >> 1249 rw [Brz_invariant_def, good_table_def,invar_def] 1250 >- metis_tac [insert_thm,regexp_compare_good] 1251 >- (rw [EVERY_MAP] >> Q.SPEC_TAC (`ALPHABET`,`alphabet`) 1252 >> Induct >> rw [smart_deriv_normalized]) 1253 >- metis_tac [member_insert,mem_regexp_def,eq_cmp_regexp_compare,insert_thm,eq_cmp_def] 1254 >- (`r IN fdom regexp_compare state_map \/ 1255 r IN set (MAP SND (MAP (��c. (c,smart_deriv c todo1)) ALPHABET))` 1256 by metis_tac [EXTENSION,dom_def,IN_UNION] 1257 >- metis_tac [dom_def] 1258 >- (fs [MAP_MAP_o,MEM_MAP] >> metis_tac [smart_deriv_normalized, dom_def])) 1259 >- (fs [GSYM dom_def] >> qpat_x_assum `dom _ = dom __` (assume_tac o SYM) 1260 >> rw [GSYM set_def] 1261 >> `invariant regexp_compare (insert regexp_compare todo1 () seen)` 1262 by metis_tac [insert_thm,regexp_compare_good] 1263 >> `invariant regexp_compare 1264 (set (MAP SND (MAP (��c. (c,smart_deriv c todo1)) ALPHABET) ++ todos))` 1265 by metis_tac [invariant_oset, set_def,regexp_compare_good] 1266 >> `invariant regexp_compare (set (todo1::todos))` 1267 by metis_tac [invariant_oset, set_def, regexp_compare_good] 1268 >> rw [dom_union] 1269 >> rw_tac bool_ss [Once CONS_APPEND,dom_set_append] 1270 >> `dom (insert regexp_compare todo1 () seen) = 1271 dom (set [todo1]) UNION dom seen` 1272 by (rw [dom_def,fdom_insert,eq_cmp_regexp_compare,Once INSERT_SING_UNION] 1273 >> match_mp_tac triv_lem 1274 >> rw_tac bool_ss [EXTENSION,set_def] 1275 >> rw [in_dom_oset,eq_cmp_regexp_compare]) 1276 >> rw [] 1277 >> rw [AC UNION_COMM UNION_ASSOC] 1278 >> metis_tac [dom_oset_lem,eq_cmp_regexp_compare,dom_def,set_def]) 1279 >- (`extend_states_inv next_state state_map ([]:(num,num) alist)` 1280 by (rw [extend_states_inv_def] >> metis_tac [range_def]) 1281 >> imp_res_tac extend_states_inv 1282 >> fs [extend_states_inv_def, count_def, EXTENSION, range_def]) 1283 >- (`todo1 IN dom state_map'` by (fs[submap_def] >> metis_tac[dom_def]) 1284 >> `eq_cmp num_cmp /\ eq_cmp regexp_compare` 1285 by metis_tac[eq_cmp_regexp_compare,num_cmp_good,num_cmp_antisym,eq_cmp_def] 1286 >> rw [GSYM oinsert_def,fdom_oimage_insert] 1287 >> `apply state_map' todo1 = x` by rw [apply_def,fapply_def] 1288 >> pop_assum SUBST_ALL_TAC 1289 >> AP_TERM_TAC 1290 >> `dom seen SUBSET dom state_map` by 1291 (qpat_x_assum `dom _ = dom state_map` (SUBST_ALL_TAC o SYM) 1292 >> metis_tac [invariant_oset, eq_cmp_def,dom_union,SUBSET_UNION]) 1293 >> rw[fdom_oimage_inst,SET_EQ_THM,EQ_IMP_THM,oin_fdom] 1294 >> Q.EXISTS_TAC `x''` >> rw[] 1295 >> `x'' IN dom state_map` by metis_tac [SUBSET_DEF,dom_def] 1296 >> `(x'' IN dom state_map') /\ 1297 (lookup regexp_compare x'' state_map = lookup regexp_compare x'' state_map')` 1298 by metis_tac [submap_def,dom_def] 1299 >> rw [apply_def,fapply_def]) 1300 >- (`extend_states_inv next_state state_map ([]:(num,num) alist)` 1301 by (rw [extend_states_inv_def] >> metis_tac [range_def]) 1302 >> imp_res_tac extend_states_inv 1303 >> pop_assum mp_tac 1304 >> rw [extend_states_inv_def]) 1305 >- (rw_tac set_ss [fdom_oimage_inst,GSYM IMP_DISJ_THM,oneTheory.one, 1306 oin_fdom,fdom_def,apply_def,fapply_def] 1307 >> strip_tac 1308 >> `fmap_inj regexp_compare state_map'` 1309 by (`extend_states_inv next_state state_map ([]:(num,num) alist)` 1310 by (rw [extend_states_inv_def] >> metis_tac [range_def]) 1311 >> imp_res_tac extend_states_inv 1312 >> pop_assum mp_tac 1313 >> rw [extend_states_inv_def]) 1314 >> `dom seen SUBSET dom state_map` by 1315 (`eq_cmp num_cmp /\ eq_cmp regexp_compare` 1316 by metis_tac[eq_cmp_regexp_compare, 1317 num_cmp_good,num_cmp_antisym,eq_cmp_def] 1318 >> qpat_x_assum `dom _ = dom state_map` (SUBST_ALL_TAC o SYM) 1319 >> metis_tac [invariant_oset, eq_cmp_def,dom_union,SUBSET_UNION]) 1320 >> pop_assum mp_tac 1321 >> rw_tac set_ss [dom_def, fdom_def, member_iff_lookup,SUBSET_DEF,oneTheory.one] 1322 >> Q.EXISTS_TAC `x'` >> rw[] 1323 >> strip_tac 1324 >> `x' IN fdom regexp_compare state_map` by fs [SUBSET_DEF,dom_def, fdom_def] 1325 >> `(x' IN fdom regexp_compare state_map') /\ 1326 (lookup regexp_compare x' state_map = lookup regexp_compare x' state_map')` 1327 by (fs [submap_def,fdom_def, member_iff_lookup] >> metis_tac []) 1328 >> fs [] 1329 >> `x' <> todo1` by (fs[mem_regexp_def,member_iff_lookup] >> metis_tac[]) 1330 >> metis_tac [fmap_inj_def,eq_cmp_regexp_compare, eq_cmp_def]) 1331 >- (pop_assum mp_tac 1332 >> rw[MAP_MAP_o,combinTheory.o_DEF] 1333 >> fs[MAP_REVERSE,MAP_MAP_o,combinTheory.o_DEF] 1334 >> metis_tac[]) 1335 >- (qpat_x_assum `(if _ then __ else ___) = ____` mp_tac 1336 >> rw[MAP_MAP_o,combinTheory.o_DEF] 1337 >- (imp_res_tac alistTheory.ALOOKUP_MEM 1338 >> fs [MEM_REVERSE,MEM_MAP]) 1339 >- metis_tac[]) 1340 >- (qpat_x_assum `(if _ then __ else ___) = ____` mp_tac 1341 >> rw[MAP_MAP_o,combinTheory.o_DEF] 1342 >- (qexists_tac `todo1` >> rw[] 1343 >> imp_res_tac alistTheory.ALOOKUP_MEM 1344 >> pop_assum mp_tac 1345 >> rw_tac set_ss [MEM_REVERSE,MEM_MAP,fapply_def] 1346 >> `smart_deriv c todo1 IN fdom regexp_compare state_map'` 1347 by (rw [MAP_MAP_o,combinTheory.o_DEF,MEM_MAP] 1348 >> disj2_tac 1349 >> qexists_tac `c` >> rw[] 1350 >> metis_tac [IN_DEF]) 1351 >> fs [fdom_def, member_iff_lookup]) 1352 >- (fs[submap_def, fdom_def, member_iff_lookup] >> metis_tac[])) 1353); 1354 1355 1356val Brz_invariant_alt = Q.prove 1357(`!seen worklist worklist' acc. 1358 (LIST_TO_SET worklist = LIST_TO_SET worklist') 1359 ==> 1360 (Brz_invariant seen worklist acc = Brz_invariant seen worklist' acc)`, 1361rpt strip_tac >> PairCases_on `acc` 1362 >> rw[Brz_invariant_def,EQ_IMP_THM] 1363 >> ((qpat_x_assum `EVERY _ __` mp_tac >> 1364 qpat_x_assum `LIST_TO_SET _ = LIST_TO_SET __` mp_tac 1365 >> rw [EVERY_MEM] 1366 >> NO_TAC) 1367 ORELSE 1368 (qpat_x_assum `dom _ = dom __` (SUBST_ALL_TAC o SYM) 1369 >> fs [invar_def] 1370 >> `invariant regexp_compare (set worklist') /\ 1371 invariant regexp_compare (set worklist)` 1372 by metis_tac [invariant_oset,set_def, regexp_compare_good] 1373 >> rw [dom_union,EXTENSION] 1374 >> `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare] 1375 >> rw [in_dom_oset,dom_def,set_def]))); 1376 1377val Brz_inv_thm = Q.prove 1378(`!seen worklist acc. 1379 dom_Brz seen worklist acc 1380 ==> 1381 !seen' acc'. 1382 Brz_invariant seen worklist acc ��� 1383 (Brzozowski seen worklist acc = (seen',acc')) 1384 ==> 1385 Brz_invariant seen' [] acc'`, 1386 ho_match_mp_tac Brzozowski_ind 1387 >> rw [] 1388 >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac 1389 >> rw [Brzozowski_eqns] 1390 >> pop_assum mp_tac 1391 >> CASE_TAC 1392 >> rw[] 1393 >- metis_tac[] 1394 >- (rfs [] 1395 >> first_assum match_mp_tac 1396 >> qpat_x_assum `Brz_invariant seen (h::t) args` mp_tac 1397 >> PairCases_on `acc` 1398 >> rw [Brz_invariant_def] 1399 >> qpat_x_assum `dom _ = dom __` (SUBST_ALL_TAC o SYM) 1400 >> fs [invar_def] 1401 >> `invariant regexp_compare (set t) /\ 1402 invariant regexp_compare (set (h::t))` 1403 by metis_tac [invariant_oset,set_def, regexp_compare_good] 1404 >> rw [dom_union,EXTENSION] 1405 >> `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare] 1406 >> rw [in_dom_oset,dom_def,set_def] 1407 >> fs [mem_regexp_def, member_iff_lookup] 1408 >> rw [fdom_def] 1409 >> metis_tac[]) 1410 >- (rfs [LET_THM] 1411 >> rw[] 1412 >> pat_elim `_ = __` 1413 >> PairCases_on `acc` 1414 >> imp_res_tac Brz_inv_pres 1415 >> `LIST_TO_SET (remove_dups (MAP SND (transitions h)) ++ t) = 1416 LIST_TO_SET (MAP SND (transitions h) ++ t)` 1417 by rw [EXTENSION,remove_dups_mem] 1418 >> metis_tac [Brz_invariant_alt])) 1419 1420val Brz_mono = Q.prove 1421(`!seen worklist acc. 1422 dom_Brz seen worklist acc 1423 ==> 1424 !seen' acc'. 1425 Brz_invariant seen worklist acc /\ 1426 (Brzozowski seen worklist acc = (seen',acc')) 1427 ==> 1428 submap regexp_compare (FST (SND acc)) (FST(SND acc'))`, 1429 ho_match_mp_tac Brzozowski_ind 1430 >> Cases_on `worklist` 1431 >- (rw [] 1432 >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac 1433 >> rw [Brzozowski_eqns,submap_id]) 1434 >- (rw [] 1435 >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac 1436 >> rw [Brzozowski_eqns] 1437 >> fs[LET_THM] 1438 >- (first_assum match_mp_tac 1439 >> qpat_x_assum `Brz_invariant seen (h::t) acc` mp_tac 1440 >> PairCases_on `acc` 1441 >> rw [Brz_invariant_def] 1442 >> qpat_x_assum `dom _ = dom __` (SUBST_ALL_TAC o SYM) 1443 >> fs [invar_def] 1444 >> `invariant regexp_compare (set t) /\ 1445 invariant regexp_compare (set (h::t))` 1446 by metis_tac [invariant_oset,set_def, regexp_compare_good] 1447 >> rw [dom_union,EXTENSION] 1448 >> `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare] 1449 >> rw [in_dom_oset,dom_def,set_def] 1450 >> fs [mem_regexp_def, member_iff_lookup] 1451 >> rw [fdom_def] 1452 >> metis_tac[]) 1453 >- (rw[] 1454 >> pat_elim `_ = __` 1455 >> PairCases_on `acc` 1456 >> imp_res_tac Brz_inv_pres 1457 >> `LIST_TO_SET (remove_dups (MAP SND (transitions h)) ++ t) = 1458 LIST_TO_SET (MAP SND (transitions h) ++ t)` 1459 by rw [EXTENSION,remove_dups_mem] 1460 >> imp_res_tac Brz_invariant_alt 1461 >> fs[] 1462 >> NTAC 3 (pop_assum (K ALL_TAC)) 1463 >> match_mp_tac submap_trans 1464 >> qexists_tac `FST (SND (build_table (transitions h) h (acc0,acc1,acc2)))` 1465 >> rw[build_table_def,LET_THM] 1466 >> `?x state_map'' z. extend_states acc0 acc1 [] (transitions h) = (x,state_map'',z)` 1467 by metis_tac [pair_CASES] 1468 >> fs[] 1469 >> `invariant regexp_compare acc1` 1470 by metis_tac [Brz_invariant_def, invar_def] 1471 >> imp_res_tac extend_states_thm 1472 >> rw [] 1473 >> CASE_TAC 1474 >> fs [] 1475 >> match_mp_tac submap_mono 1476 >> rw[eq_cmp_regexp_compare] 1477 >> qpat_x_assum `submap _ __ ___` mp_tac 1478 >> rw [submap_def] 1479 >> strip_tac 1480 >> res_tac 1481 >> fs [fdom_def] 1482 >> metis_tac [NOT_SOME_NONE]))); 1483 1484(*---------------------------------------------------------------------------*) 1485(* table_lang *) 1486(*---------------------------------------------------------------------------*) 1487 1488val table_lang_def = 1489 Define 1490 `(table_lang final_states table n [] = MEM n final_states) /\ 1491 (table_lang final_states table n (c::t) = 1492 case ALOOKUP table n of 1493 | NONE => F 1494 | SOME table2 => 1495 case ALOOKUP table2 c of 1496 | NONE => F 1497 | SOME n' => table_lang final_states table n' t)`; 1498 1499val table_lang_correct = Q.prove 1500(`!finals table state_map. 1501 fmap_inj regexp_compare state_map ��� 1502 good_table state_map table ��� 1503 (set (MAP FST table) = frange regexp_compare state_map) ��� 1504 (!n r. (lookup regexp_compare r state_map = SOME n) ��� (MEM n finals ��� nullable r)) 1505 ��� 1506 !n r s. 1507 (!c. MEM c s ��� MEM c ALPHABET) ��� 1508 (lookup regexp_compare r state_map = SOME n) 1509 ��� 1510 (table_lang finals table n s ��� smart_deriv_matches r (MAP CHR s))`, 1511 rpt gen_tac >> 1512 strip_tac >> 1513 Induct_on `s` >> 1514 rw [table_lang_def, smart_deriv_matches_def] >> 1515 fs [good_table_def] >> 1516 BasicProvers.EVERY_CASE_TAC >> 1517 rw [] 1518 >- (imp_res_tac alistTheory.ALOOKUP_NONE 1519 >> fs [EXTENSION] 1520 >> `n NOTIN frange regexp_compare state_map` by metis_tac[] 1521 >> fs [frange_def]) 1522 >- metis_tac [alistTheory.ALOOKUP_NONE] 1523 >- (`���r'.(lookup regexp_compare r' state_map = SOME n) /\ 1524 (lookup regexp_compare (smart_deriv h r') state_map = 1525 SOME x')` by metis_tac [] 1526 >> `table_lang finals table x' s <=> 1527 smart_deriv_matches (smart_deriv h r') (MAP CHR s)` 1528 by metis_tac[] 1529 >> pop_assum SUBST_ALL_TAC 1530 >> `r' = r` 1531 by (qpat_x_assum `fmap_inj _ __` mp_tac 1532 >> rw_tac set_ss [fmap_inj_def,fdom_def,regexp_compare_eq]) 1533 >> rw [] 1534 >> metis_tac [ORD_CHR_lem,mem_alphabet])) 1535 1536val vec_lang_lem1 = Q.prove 1537(`���(n:num). MEM n finals_list ��� 1538 (ALOOKUP (MAP (��x. (x,T)) finals_list) n = SOME T)`, 1539 rw [EQ_IMP_THM] 1540 >- rw [alistTheory.ALOOKUP_TABULATE] 1541 >- (`MEM (n,T) (MAP (��x. (x,T)) finals_list)` by METIS_TAC [alistTheory.ALOOKUP_MEM] 1542 >> fs[MEM_MAP])); 1543 1544val vec_lang_lem2 = 1545 alistTheory.alookup_stable_sorted 1546 |> Q.INST_TYPE [`:'a` |-> `:num`, `:'b` |-> `:(num, 'a) alist`] 1547 |> Q.SPECL [`$<=`, `mergesort`, `n`, `table`] 1548 |> SIMP_RULE (srw_ss()++ARITH_ss) [transitive_def, total_def, mergesort_STABLE_SORT]; 1549 1550(* Following are a bit suss because of use of ORD *) 1551val vec_lang_lem3 = 1552 alistTheory.alookup_stable_sorted 1553 |> Q.INST_TYPE [`:'a` |-> `:num`, `:'b` |-> `:'a option`] 1554 |> Q.SPECL [`$<=`, `mergesort`, `n`, `MAP (��(c,n). (c,SOME n)) table2`] 1555 |> SIMP_RULE (srw_ss()++ARITH_ss) 1556 [transitive_def, total_def, mergesort_STABLE_SORT, stringTheory.char_le_def]; 1557 1558val vec_lang_lem4 = Q.prove ( 1559`!l x. ALOOKUP (MAP (��(c,n). (c,SOME n)) l) x = 1560 OPTION_MAP SOME (ALOOKUP l x)`, 1561 Induct_on `l` >> 1562 rw [] >> 1563 PairCases_on `h` >> 1564 rw [] >> 1565 fs [stringTheory.ORD_11]); 1566 1567val leq_thm = 1568 let val leq = numSyntax.leq_tm 1569 in Q.prove (`transitive ^leq ��� total ^leq ��� antisymmetric ^leq`, 1570 srw_tac [ARITH_ss] [transitive_def, total_def, antisymmetric_def]) 1571 end; 1572 1573val length_mergesort = Q.prove 1574(`!l R. LENGTH (mergesort R l) = LENGTH l`, 1575 metis_tac[mergesort_perm,PERM_LENGTH]); 1576 1577val table_to_vec_thm = Q.prove ( 1578`!table final_states max_char max_state table' final_states'. 1579 (max_char = alphabet_size) /\ 1580 SORTED $<= final_states /\ 1581 (!x. MEM x final_states ��� x < max_state) ��� 1582 (!n l c. (ALOOKUP table n = SOME l) ��� (?x. ALOOKUP l c = SOME x) ��� c < max_char) ��� 1583 PERM (MAP FST table) (COUNT_LIST (LENGTH table)) ��� 1584 (table_to_vectors table = table') /\ 1585 (accepts_to_vector final_states max_state = final_states') 1586 ��� 1587 (LENGTH table' = LENGTH table) ��� 1588 (LENGTH final_states' = max_state) ��� 1589 (!n. MEM n final_states ��� n < LENGTH final_states' ��� EL n final_states') ��� 1590 (!n. (?l. ALOOKUP table n = SOME l) ��� n < LENGTH table') ��� 1591 (!n l. n < LENGTH table' ��� (LENGTH (EL n table') = max_char)) ��� 1592 (!n c x. 1593 (?l. (ALOOKUP table n = SOME l) ��� (ALOOKUP l c = SOME x)) ��� 1594 n < LENGTH table' ��� c < LENGTH (EL n table') ��� (EL c (EL n table') = SOME x))` 1595 , 1596 simp [good_table_def, table_to_vectors_def,accepts_to_vector_def] >> 1597 rpt gen_tac >> 1598 strip_tac >> 1599(* `FINITE final_states` by metis_tac [finite_bounded] >> *) 1600 qabbrev_tac `sorted_table = mergesort (inv_image $<= FST) table` >> 1601 `LENGTH sorted_table = LENGTH table` by metis_tac [mergesort_perm, PERM_LENGTH] >> 1602 `SORTS mergesort (inv_image $<= (FST : num # (num,'a) alist -> num))` 1603 by metis_tac [mergesort_STABLE_SORT, STABLE_DEF, 1604 leq_thm, total_inv_image, transitive_inv_image] >> 1605 `MAP FST sorted_table = COUNT_LIST (LENGTH table)` 1606 by (match_mp_tac sorted_perm_count_list >> 1607 fs [SORTS_DEF] >> 1608 rw [] >> 1609 metis_tac [PERM_TRANS, PERM_SYM, PERM_MAP]) >> 1610 simp [] >> 1611 conj_tac 1612 >- metis_tac[mergesort_perm,PERM_LENGTH] >> 1613 conj_tac 1614 >- (`SORTED $<= (MAP FST (MAP (\x.(x,T)) final_states))` 1615 by rw [MAP_MAP_o, combinTheory.o_DEF] 1616 >> metis_tac [alist_to_vec_thm]) >> 1617 conj_tac 1618 >- (`SORTED $<= (MAP FST (MAP (\x.(x,T)) final_states))` 1619 by rw [MAP_MAP_o, combinTheory.o_DEF] 1620 >> `LENGTH (alist_to_vec (MAP (��x. (x,T)) final_states) 1621 F max_state max_state) = max_state` 1622 by METIS_TAC [alist_to_vec_thm] 1623 >> POP_ASSUM SUBST_ALL_TAC 1624 >> RW_TAC std_ss [EQ_IMP_THM] 1625 >- (`n < max_state` by METIS_TAC [] >> 1626 `ALOOKUP (MAP (��x. (x,T)) final_states) n = SOME T` 1627 by METIS_TAC [vec_lang_lem1] >> METIS_TAC [alist_to_vec_thm]) 1628 >- (CCONTR_TAC >> fs [] 1629 >> Cases_on `ALOOKUP (MAP (��x. (x,T)) final_states) n` 1630 >- (metis_tac [alist_to_vec_thm, vec_lang_lem1]) 1631 >- (NTAC 2 (POP_ASSUM MP_TAC) >> rw [vec_lang_lem1] 1632 >> DISCH_TAC >> fs[] >> rw[] >> metis_tac [alist_to_vec_thm]))) 1633 >> 1634 conj_asm1_tac 1635 >- (rw [] >> eq_tac >> rw [] 1636 >- (imp_res_tac alistTheory.ALOOKUP_MEM >> 1637 imp_res_tac MEM_PERM >> 1638 fs [MEM_MAP, MEM_COUNT_LIST] >> 1639 metis_tac [FST,mergesort_perm,PERM_LENGTH]) 1640 >- (`ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))` 1641 by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST,mergesort_perm,PERM_LENGTH] >> 1642 `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))` 1643 by metis_tac [vec_lang_lem2] >> 1644 rw [])) >> 1645 `SORTS mergesort (inv_image $<= (FST : num # 'a option -> num))` 1646 by metis_tac [mergesort_STABLE_SORT, STABLE_DEF, leq_thm, 1647 total_inv_image, transitive_inv_image] >> 1648 `(��a b:num. a ��� b) = $<=` by metis_tac [] >> pop_assum SUBST_ALL_TAC >> 1649 simp [EL_MAP,length_mergesort] >> 1650 conj_tac 1651 >- (rw [] >> 1652 `?n' table2'. EL n sorted_table = (n',table2')` by metis_tac [pair_CASES] >> 1653 rw [] >> 1654 qabbrev_tac `sorted_table2 = 1655 mergesort (inv_image $<= FST) (MAP (��(c,n). (c,SOME n)) table2')` >> 1656 qabbrev_tac `table2 = EL n (MAP SND sorted_table)` >> 1657 `SORTED $<= (MAP FST sorted_table2)` 1658 by (fs [SORTS_DEF, sorted_map, leq_thm] >> 1659 metis_tac []) >> 1660 metis_tac [alist_to_vec_thm]) 1661 >- (* last conjunct *) 1662 (fs [length_mergesort] >> 1663 rw [] >> eq_tac >> strip_tac >> 1664 res_tac >> 1665 simp [EL_MAP] >> 1666 `?n' table2'. EL n sorted_table = (n',table2')` by metis_tac [pair_CASES] >> 1667 rw [] >> 1668 qabbrev_tac `sorted_table2 = 1669 mergesort (inv_image $<= FST) (MAP (��(c,n). (c,SOME n)) table2')` >> 1670 qabbrev_tac `table2 = EL n (MAP SND sorted_table)` >> 1671 `SORTED $<= (MAP FST sorted_table2)` 1672 by (fs [SORTS_DEF, sorted_map, leq_thm] >> 1673 metis_tac []) 1674 >- metis_tac [alist_to_vec_thm] 1675 >- (imp_res_tac alist_to_vec_thm >> 1676 fs [AND_IMP_INTRO] >> 1677 FIRST_X_ASSUM match_mp_tac >> 1678 rw [] >> 1679 `table2' = table2` 1680 by (UNABBREV_ALL_TAC >> 1681 rw [EL_MAP]) >> 1682 rw [] >> 1683 `ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))` 1684 by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST] >> 1685 `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))` 1686 by metis_tac [vec_lang_lem2] >> 1687 rw [] >> 1688 fs [] >> 1689 `ALOOKUP (MAP (��(c,n). (c,SOME n)) table2) c = SOME (SOME x)` 1690 by metis_tac [vec_lang_lem4, OPTION_MAP_DEF] >> 1691 fs [Once (GSYM vec_lang_lem3)] >> 1692 metis_tac [NOT_SOME_NONE, alist_to_vec_thm]) 1693 >- (REV_FULL_SIMP_TAC std_ss [EL_MAP] >> 1694 `table2' = table2` 1695 by (UNABBREV_ALL_TAC >> 1696 rw [EL_MAP]) >> 1697 fs [] >> 1698 res_tac >> 1699 simp [] >> 1700 Cases_on `ALOOKUP sorted_table2 c` 1701 >- (`ALOOKUP (MAP (��(c,n). (c,SOME n)) table2) c = NONE` 1702 by metis_tac [vec_lang_lem4, OPTION_MAP_DEF, vec_lang_lem3] >> 1703 metis_tac [NOT_SOME_NONE, alist_to_vec_thm]) 1704 >- (`ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))` 1705 by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST] >> 1706 `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))` 1707 by metis_tac [vec_lang_lem2] >> 1708 rw [] >> 1709 REV_FULL_SIMP_TAC std_ss [EL_MAP] >> 1710 `SOME x = x'` by metis_tac [SOME_11, alist_to_vec_thm] >> 1711 simp [] >> 1712 qunabbrev_tac `sorted_table2` >> 1713 fs [Once vec_lang_lem3] >> 1714 fs [vec_lang_lem4]))) 1715); 1716 1717 1718val Brz_invariant_final = Q.prove ( 1719`!seen next_state state_map table. 1720 Brz_invariant seen [] (next_state,state_map,table) 1721 ��� 1722 (next_state = LENGTH table) ��� 1723 PERM (MAP FST table) (COUNT_LIST (LENGTH table)) ��� 1724 (���x. MEM x (get_accepts state_map) ==> x < LENGTH table) ��� 1725 (���n l c. (ALOOKUP table n = SOME l) ��� (���x. ALOOKUP l c = SOME x) ��� MEM c ALPHABET)` 1726 , 1727 simp [Brz_invariant_def,invar_def,set_def,ounion_oempty,union_def] >> 1728 rpt gen_tac >> strip_tac >> 1729 conj_asm1_tac 1730 >- (`!x. x ��� set (MAP FST table) ��� x < next_state` 1731 by (fs [count_def, EXTENSION, range_def, frange_def,dom_def] 1732 >> rw [fdom_oimage_inst,apply_def,oin_fdom] 1733 >> fs [fdom_def,fapply_def] 1734 >> metis_tac [THE_DEF]) >> 1735 `LENGTH table = CARD (set (MAP FST table))` 1736 by metis_tac [LENGTH_MAP,ALL_DISTINCT_CARD_LIST_TO_SET, good_table_def] >> 1737 `fdom num_cmp (oimage num_cmp (apply state_map) seen) = range state_map` 1738 by (simp_tac set_ss [range_def, frange_def] >> 1739 qpat_x_assum `dom _ = dom __` mp_tac >> 1740 rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >> 1741 simp_tac set_ss [fdom_def] >> metis_tac[THE_DEF]) 1742 >> rw []) 1743>> conj_asm1_tac 1744 >- (match_mp_tac PERM_ALL_DISTINCT 1745 >> rw [all_distinct_count_list, MEM_COUNT_LIST] 1746 >- fs [good_table_def] 1747 >- (`fdom num_cmp (oimage num_cmp (apply state_map) seen) = range state_map` 1748 by (simp_tac set_ss [range_def, frange_def] >> 1749 qpat_x_assum `dom _ = dom __` mp_tac >> 1750 rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >> 1751 simp_tac set_ss [fdom_def] >> metis_tac[THE_DEF]) 1752 >> rw[])) 1753>> conj_tac 1754 >- (rw [mem_get_accepts] 1755 >> `x IN range state_map` 1756 by (simp_tac set_ss [range_def, frange_def] >> metis_tac[]) 1757 >> rfs [count_def] ) 1758 >- (rw [] >> fs [good_table_def] >> metis_tac []) 1759); 1760 1761val good_vec_def = 1762 Define 1763 `good_vec vec final_states 1764 <=> 1765 (!l c. MEM c ALPHABET ��� MEM l vec ��� c < LENGTH l) /\ 1766 (!n1 c n2 l. n1 < LENGTH vec ��� (EL n1 vec = l) ��� 1767 c < LENGTH (EL n1 vec) ��� 1768 (EL c (EL n1 vec) = SOME n2) ==> n2 < LENGTH vec) /\ 1769 (LENGTH final_states = LENGTH vec)`; 1770 1771val Brz_inv_to_good_vec = Q.prove 1772(`!seen next_state state_map table. 1773 Brz_invariant seen [] (next_state,state_map,table) ��� 1774 (table_to_vectors table = vec) /\ 1775 (accepts_to_vector (get_accepts state_map) next_state = final_states) 1776 ==> 1777 (next_state = LENGTH table) ��� 1778 good_vec vec final_states ��� 1779 (���r n. (lookup regexp_compare r state_map = SOME n) ��� n < LENGTH vec)` 1780 , 1781 simp [good_vec_def] 1782 >> rpt gen_tac >> strip_tac 1783 >> imp_res_tac Brz_invariant_final 1784 >> fs [Brz_invariant_def,invar_def, 1785 ounion_oempty,union_def, set_def, oset_def] 1786 >> 1787 `(LENGTH vec = LENGTH table) ��� 1788 (LENGTH final_states = LENGTH table) ��� 1789 (���n. MEM n (get_accepts state_map) ��� n < LENGTH final_states ��� EL n final_states) ��� 1790 (���n. (���l. ALOOKUP table n = SOME l) ��� n < LENGTH vec) ��� 1791 (!n l. n < LENGTH vec ��� (LENGTH (EL n vec) = alphabet_size)) ��� 1792 (���n c x. 1793 (���l. (ALOOKUP table n = SOME l) ��� (ALOOKUP l c = SOME x)) 1794 <=> 1795 n < LENGTH vec ��� c < LENGTH (EL n vec) ��� 1796 (EL c (EL n vec) = SOME x))` 1797 by 1798 (match_mp_tac table_to_vec_thm 1799 >> rw [sorted_get_accepts] 1800 >> metis_tac [mem_alphabet,alphabet_size_def]) 1801 >> rw [] 1802 >- (fs [MEM_EL] >> rw [] 1803 >> metis_tac [EL_MEM,mem_alphabet,alphabet_size_def]) 1804 >- (`!a. a < LENGTH table <=> MEM a (MAP FST table)` 1805 by metis_tac [MEM_PERM,MEM_COUNT_LIST] 1806 >> rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def,SYM dom_def] 1807 >> rw [dom_def,fdom_def, member_iff_lookup] 1808 >> metis_tac [good_table_def,THE_DEF]) 1809 >- (`!a. a < LENGTH table <=> MEM a (MAP FST table)` 1810 by metis_tac [MEM_PERM,MEM_COUNT_LIST] 1811 >> rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def,SYM dom_def] 1812 >> simp_tac set_ss [dom_def,fdom_def, member_iff_lookup] 1813 >> metis_tac [THE_DEF])); 1814 1815val compile_regexp_good_vec = Q.store_thm 1816("compile_regexp_good_vec", 1817`!r state_map table final_states. 1818 dom_Brz empty [normalize r] (1,singleton (normalize r) 0,[]) /\ 1819 (compile_regexp r = (state_map, table, final_states)) 1820 ==> 1821 good_vec table final_states ��� 1822 (!r n. (lookup regexp_compare r state_map = SOME n) ��� n < LENGTH table) ��� 1823 normalize r ��� fdom regexp_compare state_map` 1824 , 1825 simp [compile_regexp_def] 1826 >> rpt gen_tac >> strip_tac 1827 >> fs [] 1828 >> `?seen next_state state_map table. 1829 Brzozowski empty [normalize r] (1,singleton (normalize r) 0,[]) 1830 = (seen,next_state,state_map,table)` 1831 by metis_tac [pair_CASES] 1832 >> fs [] 1833 >> `Brz_invariant empty [normalize r] (1,singleton (normalize r) 0,[])` 1834 by 1835 (fs [Brz_invariant_def,invar_def,dom_def, normalize_thm, fmap_inj_def, good_table_def, 1836 singleton_thm,empty_def,invariant_def,mem_regexp_def,member_iff_lookup] 1837 >> rw [lookup_def] 1838 >- (fs [fdom_def, singleton_def,lookup_def] 1839 >> BasicProvers.EVERY_CASE_TAC 1840 >> rw [] 1841 >> metis_tac [eq_cmp_regexp_compare, eq_cmp_def,normalize_thm]) 1842 >- (rw [union_def,set_def,GSYM empty_def, GSYM oempty_def] >> 1843 rw [EXTENSION,fdom_def, osingleton_def, singleton_def, lookup_def] >> 1844 CASE_TAC) 1845 >- (rw [range_def,frange_def, EXTENSION, singleton_def,lookup_def,EQ_IMP_THM] 1846 >- (BasicProvers.EVERY_CASE_TAC >> rw []) 1847 >- (Q.EXISTS_TAC `normalize r` >> rw [regexp_compare_id] >> decide_tac)) 1848 >- (rw [GSYM empty_def, GSYM oempty_def] 1849 >> rw [fdom_def, lookup_def, empty_def, oempty_def]) 1850 >- (ntac 2 (pop_assum mp_tac) 1851 >> simp_tac set_ss [fdom_def,singleton_def,lookup_def] 1852 >> BasicProvers.EVERY_CASE_TAC 1853 >> rw[] 1854 >> metis_tac [eq_cmp_def, eq_cmp_regexp_compare])) 1855 >> imp_res_tac Brz_inv_thm 1856 >> imp_res_tac Brz_mono 1857 >> imp_res_tac Brz_inv_to_good_vec 1858 >> fs [] 1859 >> rw [] 1860 >- metis_tac [] 1861 >- (`normalize r ��� fdom regexp_compare (singleton (normalize r) 0)` 1862 by rw [submap_def,fdom_def,singleton_def,lookup_def,regexp_compare_id] 1863 >> metis_tac [submap_def]) 1864); 1865 1866 1867val vec_lang_correct = Q.prove 1868(`!table final_states max_char vec final_states'. 1869 (max_char = alphabet_size) /\ 1870 SORTED $<= final_states /\ 1871 (���x. MEM x final_states ��� x < LENGTH table) ��� 1872 (���n l c. (ALOOKUP table n = SOME l) ��� (���x. ALOOKUP l c = SOME x) ��� c < max_char) ��� 1873 (PERM (MAP FST table) (COUNT_LIST (LENGTH table))) ��� 1874 (!n'. n' ��� BIGUNION (IMAGE alist_range (alist_range table)) ��� n' < LENGTH table) ��� 1875 (table_to_vectors table = vec) /\ 1876 (accepts_to_vector final_states (LENGTH table) = final_states') 1877 ��� 1878 !n s. (!c. MEM c s ��� c < max_char) ��� n < LENGTH table 1879 ==> 1880 (table_lang final_states table n s 1881 <=> 1882 exec_dfa (fromList final_states') 1883 (fromList (MAP fromList vec)) 1884 n 1885 (MAP CHR s))` 1886 , 1887 rpt gen_tac 1888 >> strip_tac 1889 >> `(LENGTH vec = LENGTH table) ��� 1890 (LENGTH final_states' = LENGTH table) ��� 1891 (���n. MEM n final_states ��� n < LENGTH final_states' ��� EL n final_states') ��� 1892 (���n. (���l. ALOOKUP table n = SOME l) ��� n < LENGTH vec) ��� 1893 (!n l. n < LENGTH vec ��� (LENGTH (EL n vec) = max_char)) ��� 1894 (���n c x. 1895 (���l. (ALOOKUP table n = SOME l) ��� (ALOOKUP l c = SOME x)) 1896 ��� 1897 n < LENGTH vec ��� c < LENGTH (EL n vec) ��� 1898 (EL c (EL n vec) = SOME x))` 1899 by (match_mp_tac table_to_vec_thm 1900 >> rw [] 1901 >> metis_tac []) 1902 >> Induct_on `s` 1903 >> rw [table_lang_def, exec_dfa_thm,fromList_Vector,sub_def] 1904 >> `h < alphabet_size` by metis_tac [] 1905 >> rw [ORD_CHR_lem] 1906 >> BasicProvers.EVERY_CASE_TAC 1907 >> fs [] 1908 >- metis_tac [NOT_SOME_NONE] 1909 >- (`n < LENGTH (table_to_vectors table)` by metis_tac[] 1910 >> fs [EL_MAP,sub_def] 1911 >> metis_tac [SOME_11, NOT_SOME_NONE]) 1912 >- (`n < LENGTH (table_to_vectors table)` by metis_tac[] 1913 >> fs [EL_MAP,sub_def] 1914 >> metis_tac [NOT_SOME_NONE]) 1915 >- (`n < LENGTH (table_to_vectors table)` by metis_tac[] 1916 >> fs [EL_MAP,sub_def] 1917 >> `x' = x''` by metis_tac [SOME_11, NOT_SOME_NONE] 1918 >> rw [] 1919 >> fs [fromList_Vector] 1920 >> FIRST_X_ASSUM match_mp_tac 1921 >> FIRST_X_ASSUM match_mp_tac 1922 >> rw [PULL_EXISTS] 1923 >> rw [alistTheory.alist_range_def, EXTENSION] 1924 >> metis_tac []) 1925); 1926 1927 1928val Brz_lang_def = Define ` 1929 Brz_lang r = 1930 let (state_map,table_vec,finals_vec) = compile_regexp r in 1931 let tableV = fromList (MAP fromList table_vec) in 1932 let finalsV = fromList finals_vec 1933 in 1934 exec_dfa finalsV tableV (apply state_map (normalize r))`; 1935 1936val string_to_numlist = Q.prove 1937(`!s. (!c. c IN set s ==> ORD c IN set ALPHABET) ==> 1938 ?nl. (s = MAP CHR nl) /\ (!n. n IN set nl ==> n IN set ALPHABET)`, 1939 Induct >> rw [] 1940 >> `���nl. (s = MAP CHR nl) ��� ���n. MEM n nl ��� MEM n ALPHABET` by metis_tac[] 1941 >> `?n. (h = CHR n) /\ n < 256` by metis_tac [CHR_ONTO] 1942 >> Q.EXISTS_TAC `n::nl` 1943 >> rw [] 1944 >> metis_tac [ORD_CHR_RWT,alphabet_size_def]); 1945 1946 1947val Brzozowski_correct = Q.store_thm 1948("Brzozowski_correct", 1949`!r s. 1950 dom_Brz empty [normalize r] (1,singleton (normalize r) 0,[]) /\ 1951 (!c. MEM c s ��� MEM (ORD c) ALPHABET) 1952 ==> 1953 (Brz_lang r s = regexp_lang (normalize r) s)` 1954 , 1955 rw [GSYM regexp_lang_smart_deriv, Brz_lang_def] 1956 >> UNABBREV_ALL_TAC 1957 >> fs [compile_regexp_def] 1958 >> `?seen next_state state_map table. 1959 Brzozowski empty [normalize r] (1,singleton (normalize r) 0,[]) 1960 = (seen,next_state,state_map,table)` 1961 by metis_tac [pair_CASES,SND] 1962 >> fs [LET_THM] 1963 >> rw [] 1964 >> `Brz_invariant empty [normalize r] (1,singleton (normalize r) 0,[])` 1965 by (rw_tac set_ss [Brz_invariant_def,invar_def,dom_def, normalize_thm, 1966 fmap_inj_def, good_table_def, singleton_thm,empty_def, 1967 invariant_def,mem_regexp_def,member_iff_lookup] 1968 >- fs [lookup_def] 1969 >- (fs [fdom_def, singleton_def,lookup_def] 1970 >> BasicProvers.EVERY_CASE_TAC 1971 >> rw [] 1972 >> metis_tac [eq_cmp_regexp_compare, eq_cmp_def,normalize_thm]) 1973 >- (rw [union_def,set_def,GSYM empty_def, GSYM oempty_def] >> 1974 rw [EXTENSION,fdom_def, osingleton_def, singleton_def, lookup_def] >> 1975 CASE_TAC) 1976 >- (rw [range_def,frange_def, EXTENSION, singleton_def,lookup_def,EQ_IMP_THM] 1977 >- (BasicProvers.EVERY_CASE_TAC >> rw []) 1978 >- (Q.EXISTS_TAC `normalize r` >> rw [regexp_compare_id] >> decide_tac)) 1979 >- (rw [GSYM empty_def, GSYM oempty_def] 1980 >> rw [fdom_def, lookup_def, empty_def, oempty_def]) 1981 >- (ntac 2 (pop_assum mp_tac) 1982 >> simp_tac set_ss [fdom_def,singleton_def,lookup_def] 1983 >> BasicProvers.EVERY_CASE_TAC 1984 >> rw[] 1985 >> metis_tac [eq_cmp_def, eq_cmp_regexp_compare]) 1986 >- fs [alistTheory.ALOOKUP_def] 1987 >- fs [alistTheory.ALOOKUP_def] 1988 >- fs [alistTheory.ALOOKUP_def]) 1989 >> imp_res_tac Brz_inv_thm 1990 >> imp_res_tac Brz_mono 1991 >> imp_res_tac Brz_invariant_final 1992 >> fs [Brz_invariant_def] 1993 >> `smart_deriv_matches (normalize r) s 1994 <=> 1995 table_lang (get_accepts state_map) table (apply state_map (normalize r)) (MAP ORD s)` 1996 by (fs [invar_def,set_def,ounion_oempty,union_def] 1997 >> `fdom num_cmp (oimage num_cmp (apply state_map) seen) = frange regexp_compare state_map` 1998 by (simp_tac set_ss [frange_def] >> 1999 qpat_x_assum `dom _ = dom __` mp_tac >> 2000 rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >> 2001 simp_tac set_ss [fdom_def] >> metis_tac[THE_DEF]) 2002 >> mp_tac (table_lang_correct 2003 |> INST_TYPE [alpha |-> ``:num``] 2004 |> Q.SPEC `get_accepts state_map` 2005 |> Q.SPEC `table:(num,(num,num)alist)alist` 2006 |> Q.SPEC `state_map:(regexp,num)balanced_map`) 2007 >> fs[] 2008 >> `(���n r. (lookup regexp_compare r state_map = SOME n) ��� 2009 (MEM n (get_accepts state_map) ��� nullable r))` 2010 by (rw [mem_get_accepts,EQ_IMP_THM] 2011 >- (`r' IN fdom regexp_compare state_map` by rw [fdom_def] 2012 >> metis_tac [fmap_inj_def, eq_cmp_regexp_compare, eq_cmp_def]) 2013 >- metis_tac[]) 2014 >> rw [] 2015 >> pop_assum mp_tac 2016 >> imp_res_tac string_to_numlist 2017 >> DISCH_THEN (assume_tac o Q.SPECL [`0n`, `normalize r`, `nl`]) 2018 >> rfs[] 2019 >> rw[] 2020 >> `normalize r IN fdom regexp_compare (singleton (normalize r) 0)` 2021 by (rw [fdom_def, singleton_def,lookup_def,EQ_IMP_THM] 2022 >> BasicProvers.EVERY_CASE_TAC 2023 >> fs [regexp_compare_id]) 2024 >> `normalize r IN fdom regexp_compare state_map /\ 2025 (lookup regexp_compare (normalize r) (singleton (normalize r) 0) = 2026 lookup regexp_compare (normalize r) state_map)` 2027 by metis_tac [submap_def] 2028 >> fs [singleton_def,lookup_def,regexp_compare_id] 2029 >> qpat_x_assum `table_lang (get_accepts state_map) table 0 nl ��� 2030 smart_deriv_matches (normalize r) (MAP CHR nl)` 2031 (SUBST_ALL_TAC o SYM) 2032 >> rw [MAP_MAP_o,combinTheory.o_DEF,apply_def, fapply_def] 2033 >> pop_assum (SUBST_ALL_TAC o SYM) 2034 >> rw [] 2035 >> `MAP (��x. ORD (CHR x)) nl = MAP (\x.x) nl` 2036 by (irule MAP_CONG >> rw [] >> metis_tac [mem_alphabet,ORD_CHR_lem]) 2037 >> rw[]) 2038 >> rw [] 2039 >> imp_res_tac string_to_numlist 2040 >> rw [MAP_MAP_o,combinTheory.o_DEF,apply_def, fapply_def] 2041 >> `MAP (��x. ORD (CHR x)) nl = MAP (\x.x) nl` 2042 by (irule MAP_CONG >> rw [] >> metis_tac [mem_alphabet,ORD_CHR_lem]) 2043 >> rw [] 2044 >> match_mp_tac (GSYM (SIMP_RULE (srw_ss()) [PULL_FORALL, AND_IMP_INTRO] vec_lang_correct)) 2045 >> rw [] 2046 >- rw [sorted_get_accepts] 2047 >- metis_tac [mem_alphabet] 2048 >- (fs [good_table_def, alistTheory.alist_range_def] 2049 >> res_tac 2050 >> `n'' IN range state_map` 2051 by (simp_tac set_ss [range_def, frange_def] >> metis_tac[]) 2052 >> `n'' IN count (LENGTH table)` by metis_tac [EXTENSION] 2053 >> metis_tac [IN_COUNT]) 2054 >- metis_tac [mem_alphabet] 2055 >- (`normalize r IN fdom regexp_compare (singleton (normalize r) 0)` 2056 by (rw [fdom_def, singleton_def,lookup_def,EQ_IMP_THM] 2057 >> BasicProvers.EVERY_CASE_TAC 2058 >> fs [regexp_compare_id]) 2059 >> `normalize r IN fdom regexp_compare state_map /\ 2060 (lookup regexp_compare (normalize r) state_map = 2061 lookup regexp_compare (normalize r) (singleton (normalize r) 0))` 2062 by metis_tac [submap_def] 2063 >> rw [singleton_def,lookup_def,regexp_compare_id] 2064 >> `0n IN range state_map` 2065 by (simp_tac set_ss [range_def, frange_def] 2066 >> Q.EXISTS_TAC `normalize r` 2067 >> rw [singleton_def,lookup_def,EQ_IMP_THM] 2068 >> CASE_TAC 2069 >> fs [regexp_compare_id]) 2070 >> `0n IN count (LENGTH table)` by metis_tac [EXTENSION] 2071 >> metis_tac [IN_COUNT]) 2072); 2073 2074val Brzozowski_correct_again = Q.store_thm 2075("Brzozowski_correct", 2076`!r s. 2077 dom_Brz empty [normalize r] (1,singleton (normalize r) 0,[]) /\ 2078 (!c. MEM c s ��� MEM (ORD c) ALPHABET) 2079 ==> 2080 (regexp_matcher r s = regexp_lang r s)` 2081 , 2082 rw [] 2083 >> `Brz_lang r s = regexp_lang r s` 2084 by metis_tac [regexp_lang_normalize,Brzozowski_correct] 2085 >> pop_assum (SUBST_ALL_TAC o SYM) 2086 >> rw [Brz_lang_def, regexp_matcher_def, LET_THM,apply_def,fapply_def]); 2087 2088val Brzozowski_partial_eval = Q.store_thm 2089("Brzozowski_partial_eval", 2090`!r state_numbering delta accepts deltaV acceptsV start_state. 2091 (compile_regexp r = (state_numbering,delta,accepts)) /\ 2092 (deltaV = fromList (MAP fromList delta)) /\ 2093 (acceptsV = fromList accepts) /\ 2094 (lookup regexp_compare (normalize r) state_numbering = SOME start_state) /\ 2095 dom_Brz empty [normalize r] (1,singleton (normalize r) 0,[]) 2096 ==> !s. EVERY (\c. MEM (ORD c) ALPHABET) s 2097 ==> 2098 (exec_dfa acceptsV deltaV start_state s = regexp_lang r s)` 2099 , 2100 rw [EVERY_MEM] 2101 >> `regexp_matcher r s = regexp_lang r s` 2102 by metis_tac [Brzozowski_correct_again] 2103 >> pop_assum (SUBST_ALL_TAC o SYM) 2104 >> rw [regexp_matcher_def, LET_THM]); 2105 2106val all_ord_string = Q.prove 2107(`EVERY (\c. MEM (ORD c) ALPHABET) s 2108 <=> 2109 EVERY (\c. ORD c < alphabet_size) s`, 2110 simp_tac list_ss [mem_alphabet_iff]); 2111 2112val Brzozowski_partial_eval_conseq = Q.store_thm 2113("Brzozowski_partial_eval_conseq", 2114 `!r state_numbering delta accepts deltaV acceptsV start_state. 2115 (compile_regexp r = (state_numbering,delta,accepts)) /\ 2116 (deltaV = fromList (MAP fromList delta)) /\ 2117 (acceptsV = fromList accepts) /\ 2118 (lookup regexp_compare (normalize r) state_numbering = SOME start_state) /\ 2119 dom_Brz_alt empty [normalize r] 2120 ==> 2121 !s. EVERY (��c. ORD c < alphabet_size) s 2122 ==> 2123 (exec_dfa acceptsV deltaV start_state s = regexp_lang r s)` 2124 , 2125 metis_tac[Brzozowski_partial_eval,all_ord_string,dom_Brz_alt_equal,all_ord_string]); 2126 2127 2128(*---------------------------------------------------------------------------*) 2129(* Eliminate check that all chars in string are in alphabet. This can be *) 2130(* be done when alphabet = [0..255] *) 2131(*---------------------------------------------------------------------------*) 2132 2133val Brzozowski_partial_eval_256 = save_thm 2134("Brzozowski_partial_eval_256", 2135 if Regexp_Type.alphabet_size = 256 then 2136 SIMP_RULE list_ss [ORD_BOUND,alphabet_size_def] Brzozowski_partial_eval_conseq 2137 else TRUTH); 2138 2139 2140(* val _ = EmitTeX.tex_theory"-"; *) 2141 2142val _ = export_theory(); 2143 2144