1(*---------------------------------------------------------------------------*)
2(* Set up context                                                            *)
3(*---------------------------------------------------------------------------*)
4
5open HolKernel Parse boolLib bossLib numLib;
6open optionTheory pairTheory relationTheory arithmeticTheory
7     pred_setTheory bagTheory containerTheory
8     listTheory rich_listTheory stringTheory sortingTheory mergesortTheory
9     comparisonTheory balanced_mapTheory regexp_mapTheory osetTheory
10     finite_mapTheory vec_mapTheory charsetTheory regexpTheory
11;
12
13
14val _ = numLib.prefer_num();
15
16fun pat_elim q = Q.PAT_X_ASSUM q (K ALL_TAC);
17
18val byA = BasicProvers.byA;
19infix byA;
20
21val subst_all_tac = SUBST_ALL_TAC;
22val simp_rule = SIMP_RULE;
23
24val SET_EQ_THM = FUN_EQ_THM;
25
26val list_ss = list_ss ++ rewrites [LENGTH_NIL, LENGTH_NIL_SYM];
27
28val set_ss = list_ss ++ pred_setLib.PRED_SET_ss ++ rewrites [SET_EQ_THM,IN_DEF]
29
30val comparison_distinct = TypeBase.distinct_of ``:ordering``
31
32(*---------------------------------------------------------------------------*)
33(* Trivial lemmas                                                            *)
34(*---------------------------------------------------------------------------*)
35
36Triviality INTER_DELETE :
37  !A a. A INTER (A DELETE a) = A DELETE a
38Proof
39  SET_TAC []
40QED
41
42Triviality IN_SET_UNION :
43 !x s t.
44   x IN s ==> (s UNION t = s UNION (x INSERT t))
45Proof
46 SET_TAC []
47QED
48
49Triviality leq_thm :
50  transitive (<=) /\ total (<=) /\ antisymmetric (<=)
51Proof
52   srw_tac [ARITH_ss] [transitive_def, total_def, antisymmetric_def]
53QED
54
55Theorem MAX_LE_THM[local] :
56  !m n. m <= MAX m n /\ n <= MAX m n
57Proof
58  RW_TAC arith_ss [MAX_DEF]
59QED
60
61Theorem IS_SOME_EXISTS[local] :
62  !x. IS_SOME x = ?y. x = SOME y
63Proof
64  Cases THEN METIS_TAC [optionTheory.IS_SOME_DEF]
65QED
66
67Triviality length_mergesort :
68  !l R. LENGTH (mergesort R l) = LENGTH l
69Proof
70  metis_tac[mergesort_perm,PERM_LENGTH]
71QED
72
73Triviality LENGTH_NIL_SYM =
74   GEN_ALL (CONV_RULE (LHS_CONV SYM_CONV) (SPEC_ALL LENGTH_NIL));
75
76Theorem string_to_numlist[local] :
77 !s. (!c. c IN set s ==> ORD c IN set ALPHABET)
78     ==>
79      ?nl. (s = MAP CHR nl) /\
80           (!n. n IN set nl ==> n IN set ALPHABET)
81Proof
82 Induct >> rw []
83  >> `?nl. (s = MAP CHR nl) /\ !n. MEM n nl ==> MEM n ALPHABET` by metis_tac[]
84  >> `?n. (h = CHR n) /\ n < 256` by metis_tac [CHR_ONTO]
85  >> Q.EXISTS_TAC `n::nl`
86  >> rw []
87  >> metis_tac [ORD_CHR_RWT,alphabet_size_def]
88QED
89
90Theorem all_ord_string[local] :
91 EVERY (\c. MEM (ORD c) ALPHABET) s
92   <=>
93  EVERY (\c. ORD c < alphabet_size) s
94Proof
95 simp_tac list_ss [mem_alphabet_iff]
96QED
97
98(*---------------------------------------------------------------------------*)
99(* Let's get started                                                         *)
100(*---------------------------------------------------------------------------*)
101
102val _ = new_theory "regexp_compiler";
103
104(*---------------------------------------------------------------------------*)
105(* Output of the compiler is in terms of vectors.                            *)
106(*---------------------------------------------------------------------------*)
107
108val _ = Hol_datatype `vector = Vector of 'a list`;
109
110Definition fromList_def : fromList l = Vector l
111End
112
113Definition sub_def : sub (Vector l) n = EL n l
114End
115
116Definition length_def : length (Vector l) = LENGTH l
117End
118
119Theorem fromList_Vector = SIMP_RULE list_ss [GSYM FUN_EQ_THM] fromList_def;
120
121(*---------------------------------------------------------------------------*)
122(* Operations on the set of seen states                                      *)
123(*---------------------------------------------------------------------------*)
124
125val _ = Parse.type_abbrev("regexp_set", ``:(regexp,unit)balanced_map``);
126
127Definition insert_regexp_def :
128  insert_regexp r seen =
129      balanced_map$insert regexp_compare r () seen
130End
131
132Definition mem_regexp_def :
133  mem_regexp r seen =
134    balanced_map$member regexp_compare r seen
135End
136
137(*---------------------------------------------------------------------------*)
138(* Transitions out of a state (regexp).                                      *)
139(*---------------------------------------------------------------------------*)
140
141Definition transitions_def :
142  transitions r = MAP (\c. (c,smart_deriv c r)) ALPHABET
143End
144
145(*---------------------------------------------------------------------------*)
146(* Special case for Empty (failure state)                                    *)
147(*---------------------------------------------------------------------------*)
148
149Definition Empty_arcs_def :
150 Empty_arcs =
151  [(0,Empty);  (1,Empty);  (2,Empty);  (3,Empty);
152   (4,Empty);  (5,Empty);  (6,Empty);  (7,Empty);
153   (8,Empty);  (9,Empty);  (10,Empty); (11,Empty);
154   (12,Empty); (13,Empty); (14,Empty); (15,Empty);
155   (16,Empty); (17,Empty); (18,Empty); (19,Empty);
156   (20,Empty); (21,Empty); (22,Empty); (23,Empty);
157   (24,Empty); (25,Empty); (26,Empty); (27,Empty);
158   (28,Empty); (29,Empty); (30,Empty); (31,Empty);
159   (32,Empty); (33,Empty); (34,Empty); (35,Empty);
160   (36,Empty); (37,Empty); (38,Empty); (39,Empty);
161   (40,Empty); (41,Empty); (42,Empty); (43,Empty);
162   (44,Empty); (45,Empty); (46,Empty); (47,Empty);
163   (48,Empty); (49,Empty); (50,Empty); (51,Empty);
164   (52,Empty); (53,Empty); (54,Empty); (55,Empty);
165   (56,Empty); (57,Empty); (58,Empty); (59,Empty);
166   (60,Empty); (61,Empty); (62,Empty); (63,Empty);
167   (64,Empty); (65,Empty); (66,Empty); (67,Empty);
168   (68,Empty); (69,Empty); (70,Empty); (71,Empty);
169   (72,Empty); (73,Empty); (74,Empty); (75,Empty);
170   (76,Empty); (77,Empty); (78,Empty); (79,Empty);
171   (80,Empty); (81,Empty); (82,Empty); (83,Empty);
172   (84,Empty); (85,Empty); (86,Empty); (87,Empty);
173   (88,Empty); (89,Empty); (90,Empty); (91,Empty);
174   (92,Empty); (93,Empty); (94,Empty); (95,Empty);
175   (96,Empty); (97,Empty); (98,Empty); (99,Empty);
176   (100,Empty); (101,Empty) ; (102,Empty); (103,Empty);
177   (104,Empty); (105,Empty);
178   (106,Empty); (107,Empty); (108,Empty); (109,Empty);
179   (110,Empty); (111,Empty); (112,Empty); (113,Empty);
180   (114,Empty); (115,Empty); (116,Empty); (117,Empty);
181   (118,Empty); (119,Empty); (120,Empty); (121,Empty);
182   (122,Empty); (123,Empty); (124,Empty); (125,Empty);
183   (126,Empty); (127,Empty); (128,Empty); (129,Empty);
184   (130,Empty); (131,Empty); (132,Empty); (133,Empty);
185   (134,Empty); (135,Empty); (136,Empty); (137,Empty);
186   (138,Empty); (139,Empty); (140,Empty); (141,Empty);
187   (142,Empty); (143,Empty); (144,Empty); (145,Empty);
188   (146,Empty); (147,Empty); (148,Empty); (149,Empty);
189   (150,Empty); (151,Empty); (152,Empty); (153,Empty);
190   (154,Empty); (155,Empty); (156,Empty); (157,Empty);
191   (158,Empty); (159,Empty); (160,Empty); (161,Empty);
192   (162,Empty); (163,Empty); (164,Empty); (165,Empty);
193   (166,Empty); (167,Empty); (168,Empty); (169,Empty);
194   (170,Empty); (171,Empty); (172,Empty); (173,Empty);
195   (174,Empty); (175,Empty); (176,Empty); (177,Empty);
196   (178,Empty); (179,Empty); (180,Empty); (181,Empty);
197   (182,Empty); (183,Empty); (184,Empty); (185,Empty);
198   (186,Empty); (187,Empty); (188,Empty); (189,Empty);
199   (190,Empty); (191,Empty); (192,Empty); (193,Empty);
200   (194,Empty); (195,Empty); (196,Empty); (197,Empty);
201   (198,Empty); (199,Empty); (200,Empty); (201,Empty);
202   (202,Empty); (203,Empty); (204,Empty); (205,Empty);
203   (206,Empty); (207,Empty); (208,Empty); (209,Empty);
204   (210,Empty); (211,Empty); (212,Empty); (213,Empty);
205   (214,Empty); (215,Empty); (216,Empty); (217,Empty);
206   (218,Empty); (219,Empty); (220,Empty); (221,Empty);
207   (222,Empty); (223,Empty); (224,Empty); (225,Empty);
208   (226,Empty); (227,Empty); (228,Empty); (229,Empty);
209   (230,Empty); (231,Empty); (232,Empty); (233,Empty);
210   (234,Empty); (235,Empty); (236,Empty); (237,Empty);
211   (238,Empty); (239,Empty); (240,Empty); (241,Empty);
212   (242,Empty); (243,Empty); (244,Empty); (245,Empty);
213   (246,Empty); (247,Empty); (248,Empty); (249,Empty);
214   (250,Empty); (251,Empty); (252,Empty); (253,Empty);
215   (254,Empty); (255,Empty)]
216End;
217
218Theorem Empty_arcs_thm :
219  MAP (\c. (c,smart_deriv c Empty)) ALPHABET = Empty_arcs
220Proof
221  rw_tac (srw_ss())
222    [ALPHABET_def, Empty_arcs_def, smart_deriv_def,
223     Empty_def,Epsilon_def,charset_mem_empty]
224QED
225
226Theorem transitions_thm :
227  transitions r = if r = Empty then Empty_arcs
228                  else MAP (\c. (c,smart_deriv c r)) ALPHABET
229Proof
230  rw_tac list_ss [transitions_def] >> metis_tac [Empty_arcs_thm]
231QED
232
233(*---------------------------------------------------------------------------*)
234(* Given the outgoing arcs (c_1,r_1) ... (c_n,r_n) from node r, add all the  *)
235(* not-yet-seen r_i regexps to the state map and return the row as a list of *)
236(* (c_i, state_map(r_i)) pairs                                               *)
237(*---------------------------------------------------------------------------*)
238
239Definition extend_states_def :
240 (extend_states next_state state_map row [] = (next_state,state_map,row)) /\
241 (extend_states next_state state_map row ((c,r')::t) =
242   case lookup regexp_compare r' state_map
243    of SOME n => extend_states next_state state_map ((c,n)::row) t
244     | NONE   => extend_states
245                   (next_state + 1n)
246                   (insert regexp_compare r' next_state state_map)
247                   ((c,next_state)::row)
248                   t)
249End
250
251(*---------------------------------------------------------------------------*)
252(* Compute the row corresponding to state r and cons it on to the table      *)
253(* under construction. Also keep the state_map uptodate.                     *)
254(*---------------------------------------------------------------------------*)
255
256Definition build_table_def :
257 build_table arcs r (next_state,state_map,table) =
258   let (next_state',state_map',row) =
259             extend_states next_state state_map [] arcs
260   in case lookup regexp_compare r state_map'
261       of SOME n => (next_state', state_map', (n,row)::table)
262        | NONE   => (next_state' + 1n,
263                     insert regexp_compare r next_state' state_map',
264                     (next_state',row)::table)
265End
266
267(*---------------------------------------------------------------------------*)
268(* The core regexp compiler. The seen argument holds the already-seen        *)
269(* regexps, which map to states in the final DFA. The n argument is a        *)
270(* step-index used to ensure that the function terminates.                   *)
271(*---------------------------------------------------------------------------*)
272
273Definition Brz_def :
274  Brz seen work acc n =
275     if n <= 0n then
276       NONE
277     else
278     if null work then
279       SOME (seen,acc)
280     else
281         let (m,t) = deleteFindMin work;
282             r = FST m;
283         in if mem_regexp r seen then
284               Brz seen t acc (n-1)
285            else
286             let arcs = transitions r
287             in Brz (insert_regexp r seen)
288                    (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
289                    (build_table arcs r acc)
290                    (n-1)
291End
292
293Definition MAXNUM_32_def :
294  MAXNUM_32 = 2147483647n
295End
296
297(*---------------------------------------------------------------------------*)
298(* Build Greve-style Brz function                                            *)
299(*---------------------------------------------------------------------------*)
300
301(*---------------------------------------------------------------------------*)
302(* Domain of the function.                                                   *)
303(*---------------------------------------------------------------------------*)
304
305Definition dom_Brz_def :
306  dom_Brz seen work acc = ?d. IS_SOME(Brz seen work acc d)
307End
308
309(*---------------------------------------------------------------------------*)
310(* Create measure function rdepth. Uses following code copied from           *)
311(*                                                                           *)
312(*       HOLDIR/src/pfl/defchoose                                            *)
313(*                                                                           *)
314(*---------------------------------------------------------------------------*)
315
316(*---------------------------------------------------------------------------*)
317(*  MINCHOOSE (sname, cname,``!x1 ... xn. ?z. M[x1,...,xn,z]``)              *)
318(*   returns                                                                 *)
319(*                                                                           *)
320(*    |- !x1...xn z. M[x1,...,xn,z] ==>                                      *)
321(*                   M[x1,...,xn,cname x1...xn] /\                           *)
322(*                   !m. M[x1,...,xn,m] ==> cname x1...xn <= m               *)
323(*                                                                           *)
324(* where cname in the theorem is a constant. This theorem is stored in the   *)
325(* current theory under sname. Thus this rule is a form of the               *)
326(* well-ordering property.                                                   *)
327(*---------------------------------------------------------------------------*)
328
329Theorem WOP_ALT[local] :
330 !P. (?n. P n) ==> ?min. P min /\ !k. P k ==> min <= k
331Proof
332 METIS_TAC [arithmeticTheory.WOP,DECIDE ``~(m<n) ==> n<=m``]
333QED
334
335fun MINCHOOSE (store_name,const_name,tm) =
336 let val (V,body) = strip_forall tm
337     val P = snd(dest_comb body)
338     val wop_thm = BETA_RULE(SPEC P WOP_ALT)
339     val min_term = snd(dest_imp (concl wop_thm))
340     val min_term_pred = snd(dest_comb min_term)
341     val th1 = BETA_RULE(GSYM (ISPECL [body,min_term_pred] RIGHT_EXISTS_IMP_THM))
342     val th2 = EQ_MP th1 wop_thm
343     val th3 = GENL V th2
344     val th4 = Ho_Rewrite.REWRITE_RULE[SKOLEM_THM] th3
345     val th5 = Ho_Rewrite.REWRITE_RULE[GSYM LEFT_FORALL_IMP_THM] th4
346 in
347  new_specification(store_name, [const_name], th5)
348 end
349 handle e => raise wrap_exn "" "MINCHOOSE" e;
350
351val rdepth_thm =
352   MINCHOOSE ("rdepth_thm", "rdepth",
353    ``!seen work acc. ?d. IS_SOME(Brz seen work acc d)``)
354
355(*---------------------------------------------------------------------------*)
356(* Define Brzozo                                                             *)
357(*---------------------------------------------------------------------------*)
358
359Definition Brzozo_def :
360  Brzozo seen work acc =
361      THE (Brz seen work acc (rdepth seen work acc))
362End
363
364
365(*---------------------------------------------------------------------------*)
366(* Lemmas about Brz and definedness                                          *)
367(*---------------------------------------------------------------------------*)
368
369Theorem IS_SOME_Brz[local] :
370 !d seen work acc.
371    IS_SOME (Brz seen work acc d) ==> d <> 0
372Proof
373 Cases >> rw[Once Brz_def]
374QED
375
376Theorem Brz_SOME[local] :
377 !d seen work acc res.
378   (Brz seen work acc d = SOME res) ==> d <> 0
379Proof
380 METIS_TAC [IS_SOME_Brz,IS_SOME_EXISTS]
381QED
382
383Theorem Brz_dlem[local] :
384 !d seen work acc.
385   IS_SOME (Brz seen work acc d)
386   ==>
387   (Brz seen work acc d = Brz seen work acc (SUC d))
388Proof
389 Ho_Rewrite.REWRITE_TAC [IS_SOME_EXISTS,GSYM LEFT_FORALL_IMP_THM]
390 >> Induct
391    >- metis_tac [Brz_SOME]
392    >- (rw_tac bool_ss []
393        >> pop_assum (mp_tac o SYM)
394        >> ONCE_REWRITE_TAC [Brz_def]
395        >> rw_tac list_ss [LET_THM]
396        >> Cases_on `(deleteFindMin work)`
397        >> BasicProvers.NORM_TAC list_ss []
398        >> fs[])
399QED
400
401Theorem Brz_monotone[local] :
402 !d1 d2 seen work acc.
403      IS_SOME(Brz seen work acc d1) /\ d1 <= d2
404       ==> (Brz seen work acc d1 = Brz seen work acc d2)
405Proof
406 RW_TAC arith_ss [LESS_EQ_EXISTS] THEN
407 Induct_on `p` THEN METIS_TAC [ADD_CLAUSES,Brz_dlem]
408QED
409
410Theorem Brz_norm[local] :
411 !d seen work acc.
412   IS_SOME(Brz seen work acc d)
413    ==>
414   (Brz seen work acc d = Brz seen work acc (rdepth seen work acc))
415Proof
416  METIS_TAC [Brz_monotone,rdepth_thm]
417QED
418
419Theorem Brz_determ :
420 !d1 d2 seen work acc.
421    IS_SOME(Brz seen work acc d1) /\ IS_SOME(Brz seen work acc d2)
422       ==> (Brz seen work acc d1 = Brz seen work acc d2)
423Proof
424  METIS_TAC [Brz_norm]
425QED
426
427(*---------------------------------------------------------------------------*)
428(* Derive eqns for dom_Brz                                                   *)
429(*---------------------------------------------------------------------------*)
430
431Theorem lem[local] :
432 !seen work acc.
433   null work ==> IS_SOME (Brz seen work acc 1)
434Proof
435 RW_TAC arith_ss [Once Brz_def]
436QED
437
438Theorem dom_base_case[local] :
439 !seen work acc.
440   null work ==> dom_Brz seen work acc
441Proof
442 METIS_TAC [dom_Brz_def, lem]
443QED
444
445Theorem step2_lem1a[local] :
446 !seen work acc r x t.
447   ~null work /\
448   (deleteFindMin work = ((r,x),t)) /\
449   mem_regexp r seen /\ dom_Brz seen work acc
450    ==> dom_Brz seen t acc
451Proof
452 rw_tac std_ss [dom_Brz_def]
453  >> `d<>0` by metis_tac [IS_SOME_Brz]
454  >> qexists_tac `d-1`
455  >> qpat_x_assum `IS_SOME arg` (mp_tac o ONCE_REWRITE_RULE [Brz_def])
456  >> rw_tac arith_ss [LET_THM]
457QED
458
459Theorem step2_lem1b[local] :
460 !seen work acc r x t arcs.
461   ~null work /\
462   (deleteFindMin work = ((r,x),t)) /\
463   ~mem_regexp r seen /\
464   (arcs = transitions r) /\
465   dom_Brz seen work acc
466    ==> dom_Brz (insert_regexp r seen)
467                (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
468                (build_table arcs r acc)
469Proof
470 rw_tac std_ss [dom_Brz_def]
471  >> `d<>0` by metis_tac [IS_SOME_Brz]
472  >> qexists_tac `d-1`
473  >> qpat_x_assum `IS_SOME arg` (mp_tac o ONCE_REWRITE_RULE [Brz_def])
474  >> rw_tac arith_ss [LET_THM]
475QED
476
477Theorem step2_lem2a[local] :
478 !seen work acc r x t.
479   ~null work /\
480   (deleteFindMin work = ((r,x),t)) /\
481   mem_regexp r seen /\ dom_Brz seen t acc
482   ==> dom_Brz seen work acc
483Proof
484 rw_tac std_ss [dom_Brz_def]
485   >> Q.EXISTS_TAC `SUC d`
486   >> rw [Once Brz_def]
487QED
488
489Theorem step2_lem2b[local] :
490 !seen work acc r x t arcs.
491   ~null work /\
492   (deleteFindMin work = ((r,x),t)) /\
493   (arcs = transitions r) /\
494  ~mem_regexp r seen /\
495  dom_Brz (insert_regexp r seen)
496          (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
497          (build_table arcs r acc)
498   ==> dom_Brz seen work acc
499Proof
500 rw_tac std_ss [dom_Brz_def]
501   >> Q.EXISTS_TAC `SUC d`
502   >> rw [Once Brz_def,LET_THM]
503QED
504
505
506(*---------------------------------------------------------------------------*)
507(* Equational characterization of dom.                                       *)
508(*---------------------------------------------------------------------------*)
509
510Theorem dom_Brz_eqns :
511 dom_Brz seen work acc =
512    if null work then
513      T
514    else let (m,t) = deleteFindMin work ;
515             r = FST m;
516         in
517           if mem_regexp r seen then
518               dom_Brz seen t acc
519           else let arcs = transitions r
520                in dom_Brz (insert_regexp r seen)
521                           (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
522                           (build_table arcs r acc)
523Proof
524 Cases_on `null work`
525  >- metis_tac[dom_base_case]
526  >- (asm_simp_tac list_ss []
527       >> Cases_on `deleteFindMin work`
528       >> rw []
529       >> Cases_on `q`
530       >> fs[]
531          >- metis_tac [step2_lem1a,step2_lem2a]
532          >- metis_tac [step2_lem1b,step2_lem2b])
533QED
534
535(*---------------------------------------------------------------------------*)
536(* Optimization: dom_Brz doesn't depend on its "acc" argument, so can be     *)
537(* replaced by a version that doesn't have it.                               *)
538(*---------------------------------------------------------------------------*)
539
540Definition dom_Brz_alt_def :
541  dom_Brz_alt seen work = dom_Brz seen work ARB
542End
543
544Theorem acc_irrelevant[local] :
545 !n seen work acc acc1.
546     IS_SOME (Brz seen work acc n) ==> IS_SOME (Brz seen work acc1 n)
547Proof
548 Induct
549   >- rw [Once Brz_def]
550   >- (ONCE_REWRITE_TAC [Brz_def]
551        >> rw_tac list_ss [LET_THM]
552        >> Cases_on `(deleteFindMin work)`
553        >> fs []
554        >> metis_tac[])
555QED
556
557Theorem dom_Brz_alt_equal :
558 !seen work acc.
559     dom_Brz seen work acc = dom_Brz_alt seen work
560Proof
561  rw [dom_Brz_def, dom_Brz_alt_def] >> metis_tac [acc_irrelevant]
562QED
563
564Theorem dom_Brz_alt_eqns :
565 dom_Brz_alt seen work =
566    if null work then
567      T
568    else let (m,t) = deleteFindMin work ;
569             r = FST m;
570         in
571           if mem_regexp r seen then
572               dom_Brz_alt seen t
573           else let arcs = transitions r
574                in dom_Brz_alt (insert_regexp r seen)
575                           (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
576Proof
577 metis_tac [SIMP_RULE bool_ss [dom_Brz_alt_equal] dom_Brz_eqns]
578QED
579
580(*---------------------------------------------------------------------------*)
581(* Recursion equations for Brzozo                                            *)
582(*---------------------------------------------------------------------------*)
583
584Theorem Brzozo_base[local] :
585 !seen work acc.
586    dom_Brz seen work acc /\ null work
587    ==>
588   (Brzozo seen work acc = (seen,acc))
589Proof
590 rw_tac std_ss [Brzozo_def,dom_Brz_def]
591  >> `rdepth seen work acc <> 0`
592       by METIS_TAC [IS_SOME_Brz,rdepth_thm]
593  >> rw_tac arith_ss [Once Brz_def]
594QED
595
596Theorem Brzozo_rec1[local] :
597 !seen work accr r x t.
598     ~null work /\
599     (deleteFindMin work = ((r,x),t)) /\
600      dom_Brz seen work acc /\
601      mem_regexp r seen
602     ==>
603      (Brzozo seen work acc = Brzozo seen t acc)
604Proof
605 rw_tac std_ss [Brzozo_def,dom_Brz_def]
606  >> `d <> 0` by METIS_TAC [IS_SOME_Brz]
607  >> `Brz seen work acc d = Brz seen t acc (d-1)`
608      by rw_tac list_ss [SimpLHS,Once Brz_def,LET_THM]
609  >> metis_tac [IS_SOME_EXISTS,NOT_SOME_NONE,THE_DEF,Brz_norm]
610QED
611
612Theorem Brzozo_rec2[local] :
613 !seen work accr r x t.
614    ~null work /\
615    (deleteFindMin work = ((r,x),t)) /\
616    dom_Brz seen work acc /\
617    ~mem_regexp r seen
618    ==>
619    (Brzozo seen work acc =
620       let arcs = transitions r
621       in Brzozo (insert_regexp r seen)
622                 (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
623                 (build_table arcs r acc))
624Proof
625 rw_tac std_ss [Brzozo_def,dom_Brz_def,LET_THM]
626  >> `d <> 0` by METIS_TAC [IS_SOME_Brz]
627  >> `Brz seen work acc d =
628      let arcs = transitions r
629      in Brz (insert_regexp r seen)
630          (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
631          (build_table arcs r acc) (d-1)`
632      by rw_tac list_ss [SimpLHS,Once Brz_def,LET_THM]
633  >> METIS_TAC [IS_SOME_EXISTS,NOT_SOME_NONE,THE_DEF,Brz_norm,LET_THM]
634QED
635
636(*---------------------------------------------------------------------------*)
637(* Equational characterization of Brzozo.                                    *)
638(*---------------------------------------------------------------------------*)
639
640Theorem Brzozo_eqns[local] :
641 !seen work acc.
642    dom_Brz seen work acc
643    ==>
644     Brzozo seen work acc =
645       if null work then
646          (seen,acc)
647       else
648         let (rp,t) = deleteFindMin work;
649                  r = FST rp;
650         in
651           if mem_regexp r seen then
652              Brzozo seen t acc
653          else
654            let arcs = transitions r
655            in
656             Brzozo (insert_regexp r seen)
657                    (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
658                    (build_table arcs r acc)
659Proof
660 rw_tac list_ss [LET_THM]
661 >- metis_tac [Brzozo_base]
662 >- (Cases_on `deleteFindMin work`
663      >> Cases_on `q`
664      >> fs[]
665      >> metis_tac [Brzozo_rec1,Brzozo_rec2])
666QED
667
668(*---------------------------------------------------------------------------*)
669(* Now prove induction theorem. This is based on using rdepth as a measure   *)
670(* on the recursion. Thus we first have some lemmas about how rdepth         *)
671(* decreases in recursive calls.                                             *)
672(*---------------------------------------------------------------------------*)
673
674Theorem step3a_lt[local] :
675 !seen work acc r x t.
676   ~null work /\
677   (deleteFindMin work = ((r,x),t)) /\
678   mem_regexp r seen /\
679    dom_Brz seen work acc
680    ==> rdepth seen t acc  < rdepth seen work acc
681Proof
682 rw_tac std_ss [dom_Brz_def] THEN IMP_RES_TAC rdepth_thm THEN
683   `rdepth seen work acc <> 0` by METIS_TAC [IS_SOME_Brz] THEN
684   `rdepth seen work acc - 1 < rdepth seen work acc` by DECIDE_TAC THEN
685   `IS_SOME (Brz seen t acc (rdepth seen work acc - 1))`
686     by (qpat_x_assum `IS_SOME (Brz seen work acc (rdepth seen work acc))`
687              (mp_tac o SIMP_RULE arith_ss [Once Brz_def])
688         >> rw[LET_THM])
689    >> `rdepth seen t acc <= rdepth seen work acc - 1`
690        by metis_tac [rdepth_thm]
691    >> decide_tac
692QED
693
694Theorem step3b_lt[local] :
695 !seen work acc r x t arcs.
696    ~null work /\
697    (deleteFindMin work = ((r,x),t)) /\
698    ~mem_regexp r seen /\
699    (arcs = transitions r) /\ dom_Brz seen work acc
700    ==> rdepth (insert_regexp r seen)
701               (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
702               (build_table arcs r acc)
703         <
704        rdepth seen work acc
705Proof
706 rw_tac std_ss [dom_Brz_def] THEN IMP_RES_TAC rdepth_thm
707  >> `rdepth seen work acc <> 0` by METIS_TAC [IS_SOME_Brz]
708  >> `rdepth seen work acc - 1 < rdepth seen work acc` by DECIDE_TAC
709  >> `IS_SOME (Brz (insert_regexp r seen)
710                   (FOLDL (\work a. insert_regexp (SND a) work) t (transitions r))
711                   (build_table (transitions r) r acc)
712                   (rdepth seen work acc - 1))`
713     by (qpat_x_assum `IS_SOME (Brz seen work acc (rdepth seen work acc))`
714              (mp_tac o SIMP_RULE arith_ss [Once Brz_def])
715         >> rw[LET_THM])
716    >> `rdepth (insert_regexp r seen)
717               (FOLDL (\work a. insert_regexp (SND a) work) t (transitions r))
718               (build_table (transitions r) r acc) <= rdepth seen work acc - 1`
719        by metis_tac [rdepth_thm]
720    >> decide_tac
721QED
722
723(*---------------------------------------------------------------------------*)
724(* Induction for Brzozo is obtained by instantiating the WF induction        *)
725(* theorem with the rdepth measure and then simplifying.                     *)
726(*---------------------------------------------------------------------------*)
727
728Theorem ind_lemA =
729  prim_recTheory.WF_measure
730   |> Q.ISPEC `\(seen,work,acc). rdepth seen work acc`
731   |> MATCH_MP relationTheory.WF_INDUCTION_THM
732   |> simp_rule std_ss [prim_recTheory.measure_thm]
733   |> Q.ISPEC `\(seen,work,acc).
734                  dom_Brz seen work acc ==> P seen work acc`
735   |> simp_rule std_ss [pairTheory.FORALL_PROD]
736;
737
738Theorem ind_lemB[local] :
739  !P. (!seen work acc.
740         (!a b c. rdepth a b c < rdepth seen work acc
741                  ==> dom_Brz a b c
742                  ==> P a b c)
743         ==> dom_Brz seen work acc ==> P seen work acc)
744  ==>
745  !seen work acc. dom_Brz seen work acc ==> P seen work acc
746Proof
747rpt strip_tac
748 >> pop_assum mp_tac
749 >> `?a b c. acc = (a,b,c)` by metis_tac [ABS_PAIR_THM]
750 >> pop_assum SUBST_ALL_TAC
751 >> MAP_EVERY Q.ID_SPEC_TAC [`a`,`b`, `c`,`work`,`seen`]
752 >> match_mp_tac ind_lemA
753 >> fs [FORALL_PROD]
754QED
755
756Theorem Brzozowski_ind :
757 !P.
758   (!seen work acc.
759       dom_Brz seen work acc /\
760       (!r x t. ~null work /\
761                (deleteFindMin work = ((r,x),t)) /\
762                mem_regexp r seen ==> P seen t acc) /\
763       (!r x t arcs.
764           ~null work /\ (deleteFindMin work = ((r,x),t)) /\
765           (arcs = transitions r) /\ ~mem_regexp r seen
766           ==> P (insert_regexp r seen)
767                 (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
768                 (build_table arcs r acc))
769         ==> P seen work acc)
770  ==> !seen work acc. dom_Brz seen work acc ==> P seen work acc
771Proof
772  gen_tac
773  >> strip_tac
774  >> ho_match_mp_tac ind_lemB
775  >> simp_tac bool_ss [AND_IMP_INTRO]
776  >> rpt gen_tac
777  >> disch_then (fn th => first_x_assum match_mp_tac >> assume_tac th)
778  >> rw []
779  >> (first_x_assum match_mp_tac
780       >> asm_simp_tac list_ss [step3a_lt,step3b_lt]
781       >> mp_tac dom_Brz_eqns
782       >> rw [])
783QED
784
785(*---------------------------------------------------------------------------*)
786(* Efficient executable version of Brzozo                                    *)
787(*---------------------------------------------------------------------------*)
788
789Definition exec_Brz_def :
790  exec_Brz seen work acc d =
791    if d=0n then
792       (if dom_Brz seen work acc then Brzozo seen work acc else ARB)
793    else
794    if null work then
795          (seen,acc)
796    else
797      let (m,t) = deleteFindMin work ;
798              r = FST m;
799      in
800         if mem_regexp r seen then
801             exec_Brz seen t acc (d - 1)
802         else
803           let arcs = transitions r
804           in exec_Brz (insert_regexp r seen)
805                       (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
806                       (build_table arcs r acc) (d - 1)
807End
808
809Theorem exec_Brz_equals_Brzozo[local] :
810  !d seen work acc.
811    dom_Brz seen work acc
812    ==>
813     (exec_Brz seen work acc d = Brzozo seen work acc)
814Proof
815 Induct
816  >> rw_tac std_ss [Once exec_Brz_def,LET_THM]
817  >- metis_tac [Brzozo_eqns]
818  >- (`?r x t. deleteFindMin work = ((r,x),t)` by metis_tac [ABS_PAIR_THM]
819      >> rw [Brzozo_eqns]
820      >> (first_x_assum match_mp_tac
821           >> qpat_x_assum `dom_Brz seen work acc`
822              (mp_tac o SIMP_RULE list_ss [Once dom_Brz_eqns])
823           >> rw []))
824QED
825
826(*---------------------------------------------------------------------------*)
827(* The final definition of the core functionality of the compiler            *)
828(*---------------------------------------------------------------------------*)
829
830Definition Brzozowski_def :
831  Brzozowski seen work acc =
832      if dom_Brz seen work acc then
833         Brzozo seen work acc
834      else
835         exec_Brz seen work acc MAXNUM_32
836End
837
838(*---------------------------------------------------------------------------*)
839(* Theorem showing that Brzozowski can be executed w.o. termination proof.   *)
840(* In order to evaluate Brzozowski, we can dispatch to exec_Brz.             *)
841(*---------------------------------------------------------------------------*)
842
843Theorem Brzozowski_exec_Brz :
844   Brzozowski seen work acc = exec_Brz seen work acc MAXNUM_32
845Proof
846 rw_tac std_ss [Brzozowski_def,exec_Brz_equals_Brzozo]
847QED
848
849(*---------------------------------------------------------------------------*)
850(* In order to reason about Brzozowski, we use the following theorem.        *)
851(*---------------------------------------------------------------------------*)
852
853Theorem Brzozowski_eqns :
854 dom_Brz seen work acc
855  ==>
856 Brzozowski seen work acc =
857    if null work then
858        (seen,acc)
859    else let (m,t) = deleteFindMin work;
860                 r = FST m;
861         in if mem_regexp r seen then
862                Brzozowski seen t acc
863            else
864              let arcs = transitions r
865              in Brzozowski (insert_regexp r seen)
866                            (FOLDL (\work a. insert_regexp (SND a) work) t arcs)
867                            (build_table arcs r acc)
868Proof
869rw_tac std_ss [Brzozowski_def,LET_THM]
870 >- metis_tac [Brzozo_eqns]
871 >- (`?r x t. deleteFindMin work = ((r,x),t)` by metis_tac [ABS_PAIR_THM]
872      >> rw []
873      >> rw [SimpLHS, Once Brzozo_eqns]
874      >> (qpat_x_assum `dom_Brz seen work acc`
875              (mp_tac o SIMP_RULE list_ss [Once dom_Brz_eqns])
876          >> rw []))
877QED
878
879(*---------------------------------------------------------------------------*)
880(* Definitions that find the accept states and map the final results to      *)
881(* vector form.                                                              *)
882(*---------------------------------------------------------------------------*)
883
884Definition get_accepts_def :
885  get_accepts fmap =
886    mergesort (\a b:num. a <= b)
887       (balanced_map$foldrWithKey
888         (\r (n:num) nlist. if nullable r then n::nlist else nlist)
889         [] fmap)
890End
891
892Definition accepts_to_vector_def:
893  accepts_to_vector accept_states max_state =
894     alist_to_vec (MAP (\x. (x,T)) accept_states) F max_state max_state
895End
896
897Definition table_to_vectors_def:
898  table_to_vectors table =
899     MAP (\(k:num,table2).
900             alist_to_vec (mergesort (inv_image (\a b:num. a <= b) FST)
901                                (MAP (\(c,n). (c, SOME n)) table2))
902                          NONE alphabet_size alphabet_size)
903         (mergesort (inv_image (\a b:num. a <= b) FST) table)
904End
905
906(*---------------------------------------------------------------------------*)
907(* The basic regexp compiler function                                        *)
908(*---------------------------------------------------------------------------*)
909
910Definition compile_regexp_def :
911  compile_regexp r =
912    let r' = normalize r ;
913        (states,last_state,state_numbering,table)
914          = Brzozowski balanced_map$empty
915                       (balanced_map$singleton r' ())
916                       (1n, balanced_map$singleton r' 0n, []) ;
917        delta_vecs = table_to_vectors table ;
918        accepts_vec = accepts_to_vector (get_accepts state_numbering) last_state
919    in
920      (state_numbering, delta_vecs, accepts_vec)
921End
922
923(*---------------------------------------------------------------------------*)
924(* DFA evaluator                                                             *)
925(*---------------------------------------------------------------------------*)
926
927Definition exec_dfa_def :
928 exec_dfa finals table n s =
929   case s of
930    | "" => sub finals n
931    | c::t =>
932      case sub (sub table n) (ORD c) of
933       | NONE => F
934       | SOME k => exec_dfa finals table k t
935End
936
937Theorem exec_dfa_thm :
938  (exec_dfa finals table n [] = sub finals n) /\
939  (exec_dfa finals table n (c::t) =
940    case sub (sub table n) (ORD c) of
941     | NONE => F
942     | SOME k => exec_dfa finals table k t)
943Proof
944  CONJ_TAC
945   >- rw_tac list_ss [exec_dfa_def]
946   >- rw_tac list_ss [SimpLHS, Once exec_dfa_def]
947QED
948
949(*---------------------------------------------------------------------------*)
950(* Set of strings accepted by DFA generated from regexp r                    *)
951(*---------------------------------------------------------------------------*)
952
953Definition Brz_lang_def :
954  Brz_lang r =
955    let (state_map,table_vec,finals_vec) = compile_regexp r ;
956        tableV = fromList (MAP fromList table_vec) ;
957        finalsV = fromList finals_vec ;
958    in
959       exec_dfa finalsV tableV
960         (THE (lookup regexp_compare (normalize r) state_map))
961End
962
963
964(*---------------------------------------------------------------------------*)
965(* Combines the DFA compilation and the DFA evaluator. Returns a function of *)
966(* type :string -> bool                                                      *)
967(*---------------------------------------------------------------------------*)
968
969Definition regexp_matcher_def :
970 regexp_matcher r =
971    let (state_map,delta,accepts) = compile_regexp r ;
972        start_state = THE (lookup regexp_compare (normalize r) state_map) ;
973        acceptsV = fromList accepts ;
974        deltaV = fromList (MAP fromList delta) ;
975    in
976      exec_dfa acceptsV deltaV start_state
977End
978
979(*---------------------------------------------------------------------------*)
980(* Properties of get_accepts                                                 *)
981(*---------------------------------------------------------------------------*)
982
983Theorem sorted_get_accepts :
984  !fmap.
985     SORTED $<= (get_accepts fmap)
986Proof
987 RW_TAC set_ss [get_accepts_def] THEN
988 CONV_TAC (DEPTH_CONV ETA_CONV) THEN
989 MATCH_MP_TAC mergesort_sorted THEN
990 RW_TAC arith_ss [transitive_def, total_def]
991QED
992
993
994Theorem mem_get_acceptsA[local] :
995 !fmap P x acc.
996    balanced_map$invariant regexp_compare fmap /\
997    MEM x (foldrWithKey
998               (\r n nlist. if P r then n::nlist else nlist)
999               acc fmap)
1000     ==>
1001    MEM x acc \/ ?r. P r /\ (lookup regexp_compare r fmap = SOME x)
1002Proof
1003 Induct THEN RW_TAC std_ss []
1004  >- full_simp_tac list_ss [foldrWithKey_def,lookup_def]
1005  >- (simp_tac list_ss [lookup_bin]
1006       >>
1007      `invariant regexp_compare fmap /\
1008       invariant regexp_compare fmap' /\
1009       key_ordered regexp_compare k fmap Greater /\
1010       key_ordered regexp_compare k fmap' Less`
1011           by metis_tac [invariant_def]
1012      >> qpat_x_assum `invariant cmp (Bin n k v fmap fmap')` (K ALL_TAC)
1013      >> fs []
1014      >> RULE_ASSUM_TAC (REWRITE_RULE [foldrWithKey_def])
1015      >> RES_THEN MP_TAC >> BETA_TAC >> rw[]
1016        >- metis_tac [regexp_compare_eq]
1017        >- metis_tac [key_less_lookup,regexp_compare_good,good_cmp_thm]
1018        >- metis_tac [key_greater_lookup,regexp_compare_good,good_cmp_thm]
1019        >- metis_tac [key_less_lookup,regexp_compare_good,good_cmp_thm]
1020        >- metis_tac [key_greater_lookup,regexp_compare_good,good_cmp_thm])
1021QED
1022
1023Theorem mem_foldrWithKey_mono[local] :
1024 !P fmap x acc.
1025    MEM x acc
1026     ==>
1027     MEM x (foldrWithKey
1028               (\r n nlist. if P r then n::nlist else nlist)
1029               acc fmap)
1030Proof
1031  gen_tac
1032   >> Induct
1033   >> RW_TAC std_ss [foldrWithKey_def]
1034   >> metis_tac [MEM]
1035QED
1036
1037Theorem mem_get_acceptsB[local] :
1038 !P fmap r x acc.
1039    P r /\ (lookup regexp_compare r fmap = SOME x)
1040     ==>
1041    MEM x (foldrWithKey (\r n nlist. if P r then n::nlist else nlist) acc fmap)
1042Proof
1043  gen_tac >> Induct
1044  >- rw_tac list_ss [lookup_def,foldrWithKey_def]
1045  >- (rw_tac list_ss [lookup_bin,regexp_compare_eq,foldrWithKey_def]
1046       >> metis_tac [mem_foldrWithKey_mono,MEM])
1047QED
1048
1049Theorem mem_get_accepts[local] :
1050 !bmap x.
1051    invariant regexp_compare bmap
1052    ==>
1053     (MEM x (get_accepts bmap)
1054       <=>
1055      ?r. nullable r /\
1056          (lookup regexp_compare r bmap = SOME x))
1057Proof
1058 rw_tac list_ss [get_accepts_def,mergesort_mem]
1059  >> metis_tac [mem_get_acceptsA,mem_get_acceptsB,MEM]
1060QED
1061
1062(*---------------------------------------------------------------------------*)
1063(* Properties of extend_states                                               *)
1064(*---------------------------------------------------------------------------*)
1065
1066Definition extend_states_inv_def :
1067  extend_states_inv next_state state_map table =
1068     (invariant regexp_compare state_map /\
1069      fmap_inj regexp_compare state_map /\
1070      (frange regexp_compare state_map = count next_state) /\
1071      (!n. MEM n (MAP SND table) ==> n < next_state))
1072End
1073
1074Theorem extend_states_inv[local] :
1075 !next_state state_map table states next_state' state_map' table'.
1076     (extend_states next_state state_map table states
1077         = (next_state',state_map',table')) /\
1078     extend_states_inv next_state state_map table
1079    ==>
1080    extend_states_inv next_state' state_map' table'
1081Proof
1082 ho_match_mp_tac extend_states_ind
1083  >> rw [extend_states_def]
1084  >> BasicProvers.EVERY_CASE_TAC
1085  >> fs []
1086  >> FIRST_X_ASSUM match_mp_tac
1087  >> rw []
1088  >> fs [extend_states_inv_def, frange_def,
1089         MEM_MAP,EXTENSION,GSYM LEFT_FORALL_IMP_THM]
1090  >> `good_cmp regexp_compare /\ eq_cmp regexp_compare`
1091        by metis_tac [eq_cmp_regexp_compare,regexp_compare_good]
1092  >> rw_tac set_ss []
1093  >- metis_tac [insert_thm]
1094  >- (fs [fmap_inj_def,lookup_insert_thm]
1095       >> rw [regexp_compare_eq,fdom_insert]
1096       >> metis_tac [DECIDE ``x < (x:num) ==> F``,eq_cmp_def])
1097  >- (rw [lookup_insert_thm,regexp_compare_eq,EQ_IMP_THM]
1098       >- (pop_assum mp_tac >> rw_tac set_ss []
1099            >> metis_tac [DECIDE ``x < x + 1n``,LESS_TRANS])
1100       >- (`x < next_state \/ (x=next_state)` by DECIDE_TAC
1101             >> metis_tac [NOT_SOME_NONE]))
1102  >- rw_tac set_ss []
1103  >- metis_tac [LESS_TRANS,DECIDE ``x < x+1n``,IN_DEF]
1104  >- (rw_tac set_ss [] >> metis_tac[])
1105  >- metis_tac [LESS_TRANS,DECIDE ``x < x+1n``,IN_DEF]
1106QED
1107
1108
1109Theorem extend_states_thm :
1110 !next_state state_map table states next_state' state_map' table'.
1111    (extend_states next_state state_map table states
1112       = (next_state',state_map',table')) /\
1113    invariant regexp_compare state_map
1114   ==>
1115  (fdom regexp_compare state_map' =
1116      (fdom regexp_compare state_map UNION set (MAP SND states))) /\
1117  submap regexp_compare state_map state_map' /\
1118  next_state <= next_state' /\
1119  (table' = REVERSE
1120             (MAP (\(c,r). (c, fapply regexp_compare state_map' r)) states)
1121            ++ table) /\
1122  invariant regexp_compare state_map'
1123Proof
1124 `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare]
1125  >> ho_match_mp_tac extend_states_ind
1126  >> rw [extend_states_def]
1127  >> BasicProvers.EVERY_CASE_TAC
1128  >> `invariant regexp_compare (insert regexp_compare r' next_state state_map)`
1129       by metis_tac [insert_thm,eq_cmp_def]
1130  >> fs [submap_id]
1131  >> res_tac
1132  >- (rw_tac set_ss [fdom_insert] >> metis_tac[])
1133  >- (`r' IN fdom regexp_compare state_map` by rw [fdom_def]
1134       >> rw_tac set_ss [IN_SET_UNION])
1135  >- (match_mp_tac submap_insert >> rw [fdom_def] >> metis_tac [NOT_SOME_NONE])
1136  >- decide_tac
1137  >- (pat_elim `$! M` >> rw[]
1138       >> qpat_x_assum `submap _ __ ___` mp_tac
1139      >> rw_tac set_ss [submap_def,fapply_def]
1140      >> pop_assum (mp_tac o Q.SPEC `r'`)
1141      >> rw [fdom_insert]
1142      >> pop_assum mp_tac
1143      >> `good_cmp regexp_compare` by metis_tac [regexp_compare_good]
1144      >> rw [lookup_insert_thm,regexp_compare_eq]
1145      >> metis_tac [THE_DEF])
1146  >- (pat_elim `$! M` >> rw[]
1147       >> qpat_x_assum `submap _ __ ___` mp_tac
1148      >> rw_tac set_ss [submap_def,fapply_def]
1149      >> pop_assum (mp_tac o Q.SPEC `r'`)
1150      >> rw [fdom_def]
1151      >> metis_tac [THE_DEF])
1152QED
1153
1154(*---------------------------------------------------------------------------*)
1155(* What it means to be a good table                                          *)
1156(*---------------------------------------------------------------------------*)
1157
1158Definition good_table_def :
1159  good_table state_map table =
1160     (ALL_DISTINCT (MAP FST table) /\
1161     (!n table2.
1162        (ALOOKUP table n = SOME table2)
1163        ==>
1164        (set (MAP FST table2) = set ALPHABET)) /\
1165     (!n c n' table2.
1166        (ALOOKUP table n = SOME table2) /\
1167        (ALOOKUP table2 c = SOME n')
1168       ==>
1169       MEM c ALPHABET /\
1170       ?r r'.
1171         (lookup regexp_compare r state_map = SOME n) /\
1172         (lookup regexp_compare r' state_map = SOME n') /\
1173         (r' = smart_deriv c r)))
1174End
1175
1176(*---------------------------------------------------------------------------*)
1177(* Some abbreviations                                                        *)
1178(*---------------------------------------------------------------------------*)
1179
1180Definition invar_def : invar = invariant regexp_compare
1181End
1182
1183Definition dom_def   : dom = fdom regexp_compare
1184End
1185
1186Definition range_def : range = frange regexp_compare
1187End
1188
1189Definition union_def : union = ounion regexp_compare
1190End
1191
1192Definition set_def   : set = oset regexp_compare
1193End
1194
1195Definition image_def : image = oimage regexp_compare
1196End
1197
1198Definition apply_def : apply = fapply regexp_compare
1199End
1200
1201Definition set_of_def : set_of oset = fapply regexp_compare
1202End
1203
1204Theorem dom_union[local] :
1205 !a b.
1206    invariant regexp_compare a /\ invariant regexp_compare b
1207    ==>
1208   (dom (union a b) = (dom a) UNION (dom b))
1209Proof
1210 metis_tac [regexp_compare_good,fdom_ounion,dom_def,union_def]
1211QED
1212
1213(*---------------------------------------------------------------------------*)
1214(* Invariants on regexp compilation                                          *)
1215(*---------------------------------------------------------------------------*)
1216
1217Definition Brz_invariant_def :
1218 Brz_invariant seen todo (next_state,state_map,table) <=>
1219    invar todo /\ invar state_map /\ invar seen /\
1220    (!r. oin regexp_compare r todo ==> is_normalized r) /\
1221    (!r. mem_regexp r seen ==> is_normalized r) /\
1222    (!r. r IN dom state_map ==> is_normalized r) /\
1223    (dom (union seen todo) = dom state_map) /\
1224    (range state_map = count next_state) /\
1225    (set (MAP FST table) = fdom num_cmp (oimage num_cmp (apply state_map) seen)) /\
1226    fmap_inj regexp_compare state_map /\
1227    good_table state_map table
1228End
1229
1230Theorem triv_lem[local] :
1231  !s1 s2. (s1 = s2) ==> ((s1 UNION s) = (s2 UNION s))
1232Proof
1233 SET_TAC[]
1234QED
1235
1236Theorem Brz_inv_pres[local] :
1237 !todo1 x todos work seen next_state state_map table.
1238    ~null work /\
1239    (deleteFindMin work = ((todo1,x),todos)) /\
1240    Brz_invariant seen work (next_state,state_map,table) /\
1241    ~mem_regexp todo1 seen
1242   ==>
1243   Brz_invariant (insert_regexp todo1 seen)
1244                 (FOLDL (\work a. insert_regexp (SND a) work) todos (transitions todo1))
1245                 (build_table (transitions todo1)
1246                              todo1 (next_state,state_map,table))
1247Proof
1248 rw_tac list_ss [good_table_def,insert_regexp_def, invar_def,
1249                 Brz_invariant_def, build_table_def, transitions_def,set_def] >>
1250 imp_res_tac extend_states_thm
1251 >> `todo1 IN dom work`
1252          by metis_tac [deleteFindMin_thm,EXTENSION,IN_UNION,IN_INSERT,dom_def]
1253 >> `todo1 IN dom state_map` by metis_tac [dom_union,IN_UNION]
1254 >> `todo1 IN dom state_map'` by metis_tac [submap_def,dom_def]
1255 >> pop_assum (strip_assume_tac o SIMP_RULE set_ss [dom_def, fdom_def])
1256 >> rw []
1257 >> rw [Brz_invariant_def, good_table_def,invar_def]
1258 (* 13 subgoals *)
1259  >- metis_tac [deleteFindMin_thm,invariant_foldl]
1260  >- metis_tac [insert_thm,regexp_compare_good]
1261  >- (`MEM r (MAP SND (MAP (\c. (c,smart_deriv c todo1)) ALPHABET)) \/
1262       oin regexp_compare r todos`
1263         by metis_tac [oin_foldl_insert,deleteFindMin_thm]
1264       >- (fs [MEM_MAP,MAP_MAP_o,combinTheory.o_DEF]
1265           >> match_mp_tac smart_deriv_normalized
1266           >> `todo1 IN dom work`
1267                  by metis_tac [deleteFindMin_thm,dom_def]
1268           >> `oin regexp_compare todo1 work`
1269                  by metis_tac [oin_fdom,dom_def]
1270           >> metis_tac [])
1271       >- (fs [oin_fdom]
1272           >> `FDOM (to_fmap regexp_compare todos) =
1273               FDOM (DRESTRICT (to_fmap regexp_compare work)
1274                        (FDOM (to_fmap regexp_compare work) DELETE
1275                                key_set regexp_compare todo1))`
1276               by metis_tac [deleteFindMin,regexp_compare_good]
1277           >> fs [FDOM_DRESTRICT,INTER_DELETE]
1278           >> `{r} IN FDOM (to_fmap regexp_compare todos)`
1279                 by metis_tac[SING_IN_FDOM,eq_cmp_regexp_compare,deleteFindMin_thm]
1280           >> rfs[]
1281           >> metis_tac [SING_IN_FDOM,eq_cmp_regexp_compare,deleteFindMin_thm])
1282     )
1283  >- metis_tac [member_insert,mem_regexp_def,eq_cmp_regexp_compare,insert_thm,eq_cmp_def]
1284  >- (`r IN fdom regexp_compare state_map \/
1285       r IN set (MAP SND (MAP (\c. (c,smart_deriv c todo1)) ALPHABET))`
1286       by metis_tac [EXTENSION,dom_def,IN_UNION]
1287       >- metis_tac [dom_def]
1288       >- (fs [MAP_MAP_o,MEM_MAP] >> metis_tac [smart_deriv_normalized, dom_def]))
1289  >- (fs [GSYM dom_def] >> qpat_x_assum `dom _ = dom __` (assume_tac o SYM)
1290       >> rw [GSYM set_def]
1291       >> `invariant regexp_compare (insert regexp_compare todo1 () seen)`
1292            by metis_tac [insert_thm,regexp_compare_good]
1293       >> `dom (insert regexp_compare todo1 () seen) =
1294           dom (set [todo1]) UNION dom seen`
1295            by (rw [dom_def,fdom_insert,eq_cmp_regexp_compare,Once INSERT_SING_UNION]
1296                >> match_mp_tac triv_lem
1297                >> rw_tac bool_ss [EXTENSION,set_def]
1298                >> rw [in_dom_oset,eq_cmp_regexp_compare])
1299       >> `invariant regexp_compare
1300             (FOLDL (\work' a. insert regexp_compare (SND a) () work') todos
1301                (MAP (\c. (c,smart_deriv c todo1)) ALPHABET))`
1302           by metis_tac [invariant_foldl,deleteFindMin_thm]
1303       >> rw [dom_union]
1304       >> rw [EXTENSION]
1305       >> `invariant regexp_compare todos` by metis_tac [deleteFindMin_thm]
1306       >> rw [oin_foldl_insert |> SIMP_RULE std_ss [oin_fdom,GSYM dom_def]]
1307       >> rw [AC DISJ_COMM DISJ_ASSOC]
1308       >> qsuff_tac `x IN dom work <=> (x IN dom (set [todo1]) \/ x IN dom todos)`
1309           >- metis_tac[]
1310       >> `dom work = {todo1} UNION dom todos`
1311            by metis_tac [deleteFindMin_thm,dom_def]
1312       >> qsuff_tac `{todo1} = dom (set [todo1])`
1313           >- metis_tac[IN_UNION,EXTENSION]
1314       >> simp_tac list_ss [dom_def,set_def,dom_oset_lem,eq_cmp_regexp_compare])
1315  >- (`extend_states_inv next_state state_map ([]:(num,num) alist)`
1316               by (rw [extend_states_inv_def] >> metis_tac [range_def])
1317       >> imp_res_tac extend_states_inv
1318       >> fs [extend_states_inv_def, count_def, EXTENSION, range_def])
1319  >- (`todo1 IN dom state_map'` by (fs[submap_def] >> metis_tac[dom_def])
1320      >> `eq_cmp num_cmp /\ eq_cmp regexp_compare`
1321            by metis_tac[eq_cmp_regexp_compare,num_cmp_good,num_cmp_antisym,eq_cmp_def]
1322      >> rw [GSYM oinsert_def,fdom_oimage_insert]
1323      >> `apply state_map' todo1 = x'` by rw [apply_def,fapply_def]
1324      >> pop_assum SUBST_ALL_TAC
1325      >> AP_TERM_TAC
1326      >> `dom seen SUBSET dom state_map` by
1327           (qpat_x_assum `dom _ = dom state_map` (SUBST_ALL_TAC o SYM)
1328             >> metis_tac [invariant_oset, eq_cmp_def,dom_union,SUBSET_UNION])
1329      >> rw[fdom_oimage_inst,SET_EQ_THM,EQ_IMP_THM,oin_fdom]
1330      >> Q.EXISTS_TAC `x'''` >> rw[]
1331      >> `x''' IN dom state_map` by metis_tac [SUBSET_DEF,dom_def]
1332      >> `(x''' IN dom state_map') /\
1333          (lookup regexp_compare x''' state_map = lookup regexp_compare x''' state_map')`
1334           by metis_tac [submap_def,dom_def]
1335      >> rw [apply_def,fapply_def])
1336  >- (`extend_states_inv next_state state_map ([]:(num,num) alist)`
1337               by (rw [extend_states_inv_def] >> metis_tac [range_def])
1338       >> imp_res_tac extend_states_inv
1339       >> pop_assum mp_tac
1340       >> rw [extend_states_inv_def])
1341  >- (rw_tac set_ss [fdom_oimage_inst,GSYM IMP_DISJ_THM,oneTheory.one,
1342                     oin_fdom,fdom_def,apply_def,fapply_def]
1343       >> strip_tac
1344       >> `fmap_inj regexp_compare state_map'`
1345           by (`extend_states_inv next_state state_map ([]:(num,num) alist)`
1346                 by (rw [extend_states_inv_def] >> metis_tac [range_def])
1347               >> imp_res_tac extend_states_inv
1348               >> pop_assum mp_tac
1349               >> rw [extend_states_inv_def])
1350       >> `dom seen SUBSET dom state_map` by
1351           (`eq_cmp num_cmp /\ eq_cmp regexp_compare`
1352                by metis_tac[eq_cmp_regexp_compare,
1353                             num_cmp_good,num_cmp_antisym,eq_cmp_def]
1354             >> qpat_x_assum `dom _ = dom state_map` (SUBST_ALL_TAC o SYM)
1355             >> metis_tac [invariant_oset, eq_cmp_def,dom_union,SUBSET_UNION])
1356       >> pop_assum mp_tac
1357       >> rw_tac set_ss [dom_def, fdom_def, member_iff_lookup,SUBSET_DEF,oneTheory.one]
1358       >> Q.EXISTS_TAC `x''` >> rw[]
1359       >> strip_tac
1360       >> `x'' IN fdom regexp_compare state_map` by fs [SUBSET_DEF,dom_def, fdom_def]
1361       >> `(x'' IN fdom regexp_compare state_map') /\
1362           (lookup regexp_compare x'' state_map = lookup regexp_compare x'' state_map')`
1363             by (fs [submap_def,fdom_def, member_iff_lookup] >> metis_tac [])
1364       >> fs []
1365       >> `x'' <> todo1` by (fs[mem_regexp_def,member_iff_lookup] >> metis_tac[])
1366       >> metis_tac [fmap_inj_def,eq_cmp_regexp_compare, eq_cmp_def])
1367  >- (pop_assum mp_tac
1368        >> rw[MAP_MAP_o,combinTheory.o_DEF]
1369        >> fs[MAP_REVERSE,MAP_MAP_o,combinTheory.o_DEF]
1370        >> metis_tac[])
1371  >- (qpat_x_assum `(if _ then __ else ___) = ____` mp_tac
1372        >> rw[MAP_MAP_o,combinTheory.o_DEF]
1373             >- (imp_res_tac alistTheory.ALOOKUP_MEM
1374                  >> fs [MEM_REVERSE,MEM_MAP])
1375             >- metis_tac[])
1376  >- (qpat_x_assum `(if _ then __ else ___) = ____` mp_tac
1377        >> rw[MAP_MAP_o,combinTheory.o_DEF]
1378           >- (qexists_tac `todo1` >> rw[]
1379                  >> imp_res_tac alistTheory.ALOOKUP_MEM
1380                  >> pop_assum mp_tac
1381                  >> rw_tac set_ss [MEM_REVERSE,MEM_MAP,fapply_def]
1382                  >> `smart_deriv c todo1 IN fdom regexp_compare state_map'`
1383                      by (rw [MAP_MAP_o,combinTheory.o_DEF,MEM_MAP]
1384                           >> disj2_tac
1385                           >> qexists_tac `c` >> rw[]
1386                           >> metis_tac [IN_DEF])
1387                  >> fs [fdom_def, member_iff_lookup])
1388           >- (fs[submap_def, fdom_def, member_iff_lookup] >> metis_tac[]))
1389QED
1390
1391Theorem Brz_inv_thm[local] :
1392 !seen work acc.
1393   dom_Brz seen work acc
1394   ==>
1395    !seen' acc'.
1396      Brz_invariant seen work acc /\
1397      (Brzozowski seen work acc = (seen',acc'))
1398      ==>
1399       Brz_invariant seen' balanced_map$empty acc'
1400Proof
1401 simp_tac std_ss [empty_def]
1402 >> ho_match_mp_tac Brzozowski_ind
1403 >> rw []
1404 >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac
1405 >> rw [Brzozowski_eqns]
1406    >- (Cases_on `work` >> fs[null_def])
1407    >- (`?r work'. deleteFindMin work = ((r,()),work')`
1408            by metis_tac [ABS_PAIR_THM,oneTheory.one]
1409        >> fs[]
1410        >> BasicProvers.NORM_TAC set_ss []
1411        >> first_x_assum match_mp_tac
1412        >> rw[]
1413        >- (qpat_x_assum `Brz_invariant _ _ _` mp_tac
1414            >> `?next_state state_map table.
1415                  acc = (next_state,state_map,table)` by metis_tac[ABS_PAIR_THM]
1416            >> pop_assum subst_all_tac
1417            >> rw [Brz_invariant_def]
1418               >- metis_tac [deleteFindMin_thm,invar_def]
1419               >- metis_tac[deleteFindMin_thm,invar_def,
1420                            oin_fdom,IN_UNION,IN_INSERT,EXTENSION]
1421               >- (qpat_x_assum `dom (union seen work) = dom state_map`
1422                                (subst_all_tac o SYM)
1423                   >> fs [invar_def,dom_def,union_def, mem_regexp_def,GSYM oin_def]
1424                   >> `invariant regexp_compare work' /\
1425                       fdom regexp_compare work =
1426                       {r} UNION fdom regexp_compare work'`
1427                       by metis_tac[deleteFindMin_thm,invar_def]
1428                   >> fs [GSYM oin_fdom,EXTENSION,oin_ounion,good_oset_def,
1429                          regexp_compare_good]
1430                   >> metis_tac[]))
1431        >- metis_tac [Brz_inv_pres,ABS_PAIR_THM])
1432QED
1433
1434
1435Theorem Brz_mono[local] :
1436 !seen work acc.
1437   dom_Brz seen work acc
1438   ==>
1439   !seen' acc'.
1440     Brz_invariant seen work acc /\
1441     (Brzozowski seen work acc = (seen',acc'))
1442     ==>
1443     submap regexp_compare (FST (SND acc)) (FST(SND acc'))
1444Proof
1445 ho_match_mp_tac Brzozowski_ind
1446 >> rw[]
1447 >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac
1448 >> rw [Brzozowski_eqns]
1449 >> fs []
1450  >- metis_tac [submap_id]
1451  >- (`?r work'. deleteFindMin work = ((r,()),work')`
1452            by metis_tac [ABS_PAIR_THM,oneTheory.one]
1453      >> fs[]
1454      >> BasicProvers.NORM_TAC set_ss []
1455       >- (first_assum match_mp_tac
1456           >> qexists_tac `seen'` >> rw[]
1457           >> qpat_x_assum `Brz_invariant seen work acc` mp_tac
1458           >> `?next_state state_map table.
1459                     acc = (next_state,state_map,table)` by metis_tac[ABS_PAIR_THM]
1460           >> pop_assum subst_all_tac
1461           >> rw [Brz_invariant_def,invar_def]
1462            >- metis_tac [deleteFindMin_thm]
1463            >- metis_tac[deleteFindMin_thm,invar_def,
1464                         oin_fdom,IN_UNION,IN_INSERT,EXTENSION]
1465            >- (qpat_x_assum `dom (union seen work) = dom state_map`
1466                             (subst_all_tac o SYM)
1467                >> fs [invar_def,dom_def,union_def,
1468                       mem_regexp_def,GSYM oin_def]
1469                >> `invariant regexp_compare work' /\
1470                    fdom regexp_compare work =
1471                    {r} UNION fdom regexp_compare work'`
1472                          by metis_tac[deleteFindMin_thm,invar_def]
1473                >> fs [GSYM oin_fdom,EXTENSION,oin_ounion,
1474                       good_oset_def,regexp_compare_good]
1475                >> metis_tac[])
1476          )
1477       >- (`?next_state state_map table.
1478               acc = (next_state,state_map,table)` by metis_tac[ABS_PAIR_THM]
1479           >> pop_assum subst_all_tac
1480           >> imp_res_tac Brz_inv_pres
1481           >> fs []
1482           >> match_mp_tac submap_trans
1483           >> qexists_tac
1484              `FST (SND (build_table (transitions r) r
1485                                     (next_state,state_map,table)))`
1486           >> fs[]
1487           >> rw[build_table_def,LET_THM]
1488           >> `?next_state' state_map' trans'.
1489                 extend_states next_state state_map [] (transitions r)
1490                  = (next_state',state_map', trans')`
1491                by metis_tac[ABS_PAIR_THM]
1492           >> rw[]
1493           >> `invariant regexp_compare state_map` by metis_tac [Brz_invariant_def,invar_def]
1494           >> imp_res_tac extend_states_thm
1495           >> CASE_TAC
1496           >> rw[]
1497           >> match_mp_tac submap_mono
1498           >> rw[eq_cmp_regexp_compare]
1499           >> strip_tac
1500           >> `r IN fdom regexp_compare state_map'` by metis_tac[submap_def]
1501           >> pop_assum mp_tac
1502           >> simp_tac set_ss [fdom_def]
1503           >> metis_tac[NOT_SOME_NONE]))
1504QED
1505
1506Theorem Brz_invariant_final[local] :
1507 !seen next_state state_map table.
1508   Brz_invariant seen empty (next_state,state_map,table)
1509    ==>
1510    (next_state = LENGTH table) /\
1511    PERM (MAP FST table) (COUNT_LIST (LENGTH table)) /\
1512    (!x. MEM x (get_accepts state_map) ==> x < LENGTH table) /\
1513    (!n l c. (ALOOKUP table n = SOME l) /\ (?x. ALOOKUP l c = SOME x) ==> MEM c ALPHABET)
1514Proof
1515 simp [Brz_invariant_def,invar_def,set_def,ounion_oempty,union_def] >>
1516 rpt gen_tac >> strip_tac >> conj_asm1_tac
1517 >- (`!x. x IN set (MAP FST table) ==> x < next_state`
1518           by (fs [count_def, EXTENSION, range_def, frange_def,dom_def]
1519                >> rw [fdom_oimage_inst,apply_def,oin_fdom]
1520                >> fs [fdom_def,fapply_def]
1521                >> metis_tac [THE_DEF,ounion_oempty,oempty_def])
1522     >>
1523     `LENGTH table = CARD (set (MAP FST table))`
1524           by metis_tac [LENGTH_MAP,ALL_DISTINCT_CARD_LIST_TO_SET, good_table_def]
1525     >>
1526     `fdom num_cmp (oimage num_cmp (apply state_map) seen) = range state_map`
1527       by (simp_tac set_ss [range_def, frange_def] >>
1528           qpat_x_assum `dom _ = dom __` mp_tac >>
1529           rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >>
1530           fs[fdom_def,ounion_oempty, GSYM oempty_def,EXTENSION] >>
1531           metis_tac[THE_DEF])
1532     >> rw [])
1533 >> conj_asm1_tac
1534   >- (match_mp_tac PERM_ALL_DISTINCT
1535       >> rw [all_distinct_count_list, MEM_COUNT_LIST]
1536        >- fs [good_table_def]
1537        >- (`fdom num_cmp (oimage num_cmp (apply state_map) seen) = range state_map`
1538             by (simp_tac set_ss [range_def, frange_def] >>
1539                 qpat_x_assum `dom _ = dom __` mp_tac >>
1540                 simp_tac set_ss [ounion_oempty, GSYM oempty_def] >>
1541                 rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >>
1542                 fs [fdom_def,EXTENSION] >> metis_tac[THE_DEF])
1543            >> qpat_x_assum `set (MAP FST table) = _` (subst_all_tac o SYM)
1544            >> rw[]))
1545 >> conj_tac
1546 >- (rw [mem_get_accepts]
1547     >> `x IN range state_map`
1548           by (simp_tac set_ss [range_def, frange_def] >> metis_tac[])
1549     >> rfs [count_def])
1550 >- (rw [] >> fs [good_table_def] >> metis_tac [])
1551QED
1552
1553
1554(*---------------------------------------------------------------------------*)
1555(* table_lang parallels the definition of exec_dfa                           *)
1556(*---------------------------------------------------------------------------*)
1557
1558Definition table_lang_def :
1559  (table_lang final_states table n [] = MEM n final_states) /\
1560  (table_lang final_states table n (c::t) =
1561     case ALOOKUP table n of
1562       | NONE => F
1563       | SOME table2 =>
1564           case ALOOKUP table2 c of
1565             | NONE => F
1566             | SOME n' => table_lang final_states table n' t)
1567End
1568
1569Theorem table_lang_correct[local] :
1570 !finals table state_map.
1571   fmap_inj regexp_compare state_map /\
1572   good_table state_map table /\
1573   (set (MAP FST table) = frange regexp_compare state_map) /\
1574   (!n r. (lookup regexp_compare r state_map = SOME n) ==> (MEM n finals <=> nullable r))
1575   ==>
1576   !n r s.
1577     (!c. MEM c s ==> MEM c ALPHABET) /\
1578     (lookup regexp_compare r state_map = SOME n)
1579     ==>
1580     (table_lang finals table n s <=> smart_deriv_matches r (MAP CHR s))
1581Proof
1582 rpt gen_tac >>
1583 strip_tac >>
1584 Induct_on `s` >>
1585 rw [table_lang_def, smart_deriv_matches_def] >>
1586 fs [good_table_def] >>
1587 BasicProvers.EVERY_CASE_TAC >>
1588 rw []
1589 >- (imp_res_tac alistTheory.ALOOKUP_NONE
1590     >> fs [EXTENSION]
1591     >> `n NOTIN frange regexp_compare state_map` by metis_tac[]
1592     >> fs [frange_def])
1593 >- metis_tac [alistTheory.ALOOKUP_NONE]
1594 >- (`?r'.(lookup regexp_compare r' state_map = SOME n) /\
1595          (lookup regexp_compare (smart_deriv h r') state_map =
1596           SOME x')` by metis_tac []
1597     >> `table_lang finals table x' s <=>
1598         smart_deriv_matches (smart_deriv h r') (MAP CHR s)`
1599         by metis_tac[]
1600     >> pop_assum SUBST_ALL_TAC
1601     >> `r' = r`
1602         by (qpat_x_assum `fmap_inj _ __` mp_tac
1603              >> rw_tac set_ss [fmap_inj_def,fdom_def,regexp_compare_eq])
1604     >> rw []
1605     >> metis_tac [ORD_CHR_lem,mem_alphabet])
1606QED
1607
1608Theorem vec_lang_lem1[local] :
1609 !(n:num).
1610    MEM n finals_list <=> (ALOOKUP (MAP (\x. (x,T)) finals_list) n = SOME T)
1611Proof
1612 rw [EQ_IMP_THM]
1613  >- rw [alistTheory.ALOOKUP_TABULATE]
1614  >- (`MEM (n,T) (MAP (\x. (x,T)) finals_list)` by METIS_TAC [alistTheory.ALOOKUP_MEM]
1615      >> fs[MEM_MAP])
1616QED
1617
1618Theorem vec_lang_lem2 =
1619  alistTheory.alookup_stable_sorted
1620     |> Q.INST_TYPE [`:'a` |-> `:num`, `:'b` |-> `:(num, 'a) alist`]
1621     |> Q.SPECL [`$<=`, `mergesort`, `n`, `table`]
1622     |> SIMP_RULE (srw_ss()++ARITH_ss) [transitive_def, total_def, mergesort_STABLE_SORT];
1623
1624Theorem vec_lang_lem3 =
1625  alistTheory.alookup_stable_sorted
1626     |> Q.INST_TYPE [`:'a` |-> `:num`, `:'b` |-> `:'a option`]
1627     |> Q.SPECL [`$<=`, `mergesort`, `n`, `MAP (\(c,n). (c,SOME n)) table2`]
1628     |> SIMP_RULE (srw_ss()++ARITH_ss)
1629           [transitive_def, total_def, mergesort_STABLE_SORT, stringTheory.char_le_def];
1630
1631Theorem vec_lang_lem4[local] :
1632 !l x. ALOOKUP (MAP (\(c,n). (c,SOME n)) l) x = OPTION_MAP SOME (ALOOKUP l x)
1633Proof
1634 Induct_on `l` >>
1635 rw [] >>
1636 PairCases_on `h` >>
1637 rw [] >>
1638 fs [stringTheory.ORD_11]
1639QED
1640
1641
1642Theorem table_to_vec_thm[local] :
1643 !table final_states max_char max_state table' final_states'.
1644    (max_char = alphabet_size) /\
1645    SORTED $<= final_states /\
1646    (!x. MEM x final_states ==> x < max_state) /\
1647    (!n l c. (ALOOKUP table n = SOME l) /\
1648             (?x. ALOOKUP l c = SOME x) ==> c < max_char) /\
1649    PERM (MAP FST table) (COUNT_LIST (LENGTH table)) /\
1650    (table_to_vectors table = table') /\
1651    (accepts_to_vector final_states max_state = final_states')
1652  ==>
1653   (LENGTH table' = LENGTH table) /\
1654   (LENGTH final_states' = max_state) /\
1655   (!n. MEM n final_states <=> n < LENGTH final_states' /\ EL n final_states') /\
1656   (!n. (?l. ALOOKUP table n = SOME l) <=> n < LENGTH table') /\
1657   (!n l. n < LENGTH table' ==> (LENGTH (EL n table') = max_char)) /\
1658   (!n c x.
1659     (?l. (ALOOKUP table n = SOME l) /\ (ALOOKUP l c = SOME x)) <=>
1660     n < LENGTH table' /\
1661     c < LENGTH (EL n table') /\ (EL c (EL n table') = SOME x))
1662Proof
1663 simp [good_table_def, table_to_vectors_def,accepts_to_vector_def] >>
1664 rpt gen_tac >>
1665 strip_tac >>
1666 qabbrev_tac `sorted_table = mergesort (inv_image $<= FST) table` >>
1667 `LENGTH sorted_table = LENGTH table` by metis_tac [mergesort_perm, PERM_LENGTH] >>
1668 `SORTS mergesort (inv_image $<= (FST : num # (num,'a) alist -> num))`
1669          by metis_tac [mergesort_STABLE_SORT, STABLE_DEF,
1670                        leq_thm, total_inv_image, transitive_inv_image] >>
1671 `MAP FST sorted_table = COUNT_LIST (LENGTH table)`
1672            by (match_mp_tac sorted_perm_count_list >>
1673                fs [SORTS_DEF] >>
1674                rw [] >>
1675                metis_tac [PERM_TRANS, PERM_SYM, PERM_MAP]) >>
1676 simp [] >>
1677 conj_tac
1678 >- metis_tac[mergesort_perm,PERM_LENGTH] >>
1679 conj_tac
1680 >- (`SORTED $<= (MAP FST (MAP (\x.(x,T)) final_states))`
1681            by rw [MAP_MAP_o, combinTheory.o_DEF]
1682      >> metis_tac [alist_to_vec_thm]) >>
1683 conj_tac
1684 >- (`SORTED $<= (MAP FST (MAP (\x.(x,T)) final_states))`
1685            by rw [MAP_MAP_o, combinTheory.o_DEF]
1686     >> `LENGTH (alist_to_vec (MAP (\x. (x,T)) final_states)
1687                              F max_state max_state) = max_state`
1688           by METIS_TAC [alist_to_vec_thm]
1689    >> POP_ASSUM SUBST_ALL_TAC
1690    >> RW_TAC std_ss [EQ_IMP_THM]
1691       >- (`n < max_state` by METIS_TAC [] >>
1692           `ALOOKUP (MAP (\x. (x,T)) final_states) n = SOME T`
1693               by METIS_TAC [vec_lang_lem1] >> METIS_TAC [alist_to_vec_thm])
1694       >- (CCONTR_TAC >> fs []
1695           >> Cases_on `ALOOKUP (MAP (\x. (x,T)) final_states) n`
1696              >- (metis_tac [alist_to_vec_thm, vec_lang_lem1])
1697              >- (NTAC 2 (POP_ASSUM MP_TAC) >> rw [vec_lang_lem1]
1698                  >> DISCH_TAC >> fs[] >> rw[] >> metis_tac [alist_to_vec_thm])))
1699 >>
1700 conj_asm1_tac
1701 >- (rw [] >> eq_tac >> rw []
1702     >- (imp_res_tac alistTheory.ALOOKUP_MEM >>
1703         imp_res_tac MEM_PERM >>
1704         fs [MEM_MAP, MEM_COUNT_LIST] >>
1705         metis_tac [FST,mergesort_perm,PERM_LENGTH])
1706     >- (`ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))`
1707             by metis_tac [dense_alist_to_vec_thm,
1708                   LENGTH_COUNT_LIST,mergesort_perm,PERM_LENGTH] >>
1709         `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))`
1710                by metis_tac [vec_lang_lem2] >>
1711         rw [])) >>
1712 `SORTS mergesort (inv_image $<= (FST : num # 'a option -> num))`
1713          by metis_tac [mergesort_STABLE_SORT, STABLE_DEF, leq_thm,
1714                        total_inv_image, transitive_inv_image] >>
1715 `(\a b:num. a <= b) = $<=` by metis_tac [] >> pop_assum SUBST_ALL_TAC >>
1716 simp [EL_MAP,length_mergesort] >>
1717 conj_tac
1718 >- (rw [] >>
1719     `?n' table2'. EL n sorted_table = (n',table2')`
1720          by metis_tac [pair_CASES] >> rw [] >>
1721     qabbrev_tac `sorted_table2 =
1722         mergesort (inv_image $<= FST) (MAP (\(c,n). (c,SOME n)) table2')` >>
1723     qabbrev_tac `table2 = EL n (MAP SND sorted_table)` >>
1724     `SORTED $<= (MAP FST sorted_table2)`
1725             by (fs [SORTS_DEF, sorted_map, leq_thm] >> metis_tac []) >>
1726     metis_tac [alist_to_vec_thm])
1727 >- (* last conjunct *)
1728 (fs [length_mergesort] >>
1729  rw [] >>  eq_tac >>  strip_tac >> res_tac >> simp [EL_MAP] >>
1730 `?n' table2'. EL n sorted_table = (n',table2')` by metis_tac [pair_CASES] >>
1731 rw [] >>
1732 qabbrev_tac `sorted_table2 =
1733      mergesort (inv_image $<= FST) (MAP (\(c,n). (c,SOME n)) table2')` >>
1734 qabbrev_tac `table2 = EL n (MAP SND sorted_table)` >>
1735 `SORTED $<= (MAP FST sorted_table2)`
1736         by (fs [SORTS_DEF, sorted_map, leq_thm] >> metis_tac [])
1737 >- metis_tac [alist_to_vec_thm]
1738 >- (imp_res_tac alist_to_vec_thm >>
1739     fs [AND_IMP_INTRO] >>
1740     FIRST_X_ASSUM match_mp_tac >>  rw [] >>
1741     `table2' = table2` by (UNABBREV_ALL_TAC >> rw [EL_MAP]) >> rw [] >>
1742     `ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))`
1743                by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST] >>
1744     `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))`
1745                by metis_tac [vec_lang_lem2] >>
1746     rw [] >>
1747     fs [] >>
1748     `ALOOKUP (MAP (\(c,n). (c,SOME n)) table2) c = SOME (SOME x)`
1749              by metis_tac [vec_lang_lem4, OPTION_MAP_DEF] >>
1750     fs [Once (GSYM vec_lang_lem3)] >>
1751     metis_tac [NOT_SOME_NONE, alist_to_vec_thm])
1752 >- (REV_FULL_SIMP_TAC std_ss [EL_MAP] >>
1753     `table2' = table2` by (UNABBREV_ALL_TAC >> rw [EL_MAP]) >> fs [] >>
1754     res_tac >> simp [] >>
1755     Cases_on `ALOOKUP sorted_table2 c`
1756     >- (`ALOOKUP (MAP (\(c,n). (c,SOME n)) table2) c = NONE`
1757          by metis_tac [vec_lang_lem4, OPTION_MAP_DEF, vec_lang_lem3] >>
1758         metis_tac [NOT_SOME_NONE, alist_to_vec_thm])
1759    >- (`ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))`
1760           by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST] >>
1761        `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))`
1762           by metis_tac [vec_lang_lem2] >>
1763        rw [] >>
1764        REV_FULL_SIMP_TAC std_ss [EL_MAP] >>
1765        `SOME x = x'` by metis_tac [SOME_11, alist_to_vec_thm] >>
1766        simp [] >>
1767        qunabbrev_tac `sorted_table2` >>
1768        fs [Once vec_lang_lem3] >>
1769        fs [vec_lang_lem4])))
1770QED
1771
1772
1773(*---------------------------------------------------------------------------*)
1774(* Well-formedness of final state vector                                     *)
1775(*---------------------------------------------------------------------------*)
1776
1777Definition good_vec_def :
1778 good_vec vec final_states
1779      <=>
1780    (!l c. MEM c ALPHABET /\ MEM l vec ==> c < LENGTH l) /\
1781    (!n1 c n2 l. n1 < LENGTH vec /\ (EL n1 vec = l) /\
1782                 c < LENGTH (EL n1 vec) /\
1783                 (EL c (EL n1 vec) = SOME n2) ==> n2 < LENGTH vec) /\
1784    (LENGTH final_states = LENGTH vec)
1785End
1786
1787
1788Theorem Brz_inv_to_good_vec[local] :
1789 !seen next_state state_map table.
1790   Brz_invariant seen empty (next_state,state_map,table) /\
1791    (table_to_vectors table = vec) /\
1792    (accepts_to_vector (get_accepts state_map) next_state = final_states)
1793    ==>
1794    (next_state = LENGTH table) /\
1795    good_vec vec final_states /\
1796    (!r n. (lookup regexp_compare r state_map = SOME n) ==> n < LENGTH vec)
1797Proof
1798 simp [good_vec_def]
1799  >> rpt gen_tac >> strip_tac
1800  >> imp_res_tac Brz_invariant_final
1801  >> fs [Brz_invariant_def,invar_def,
1802        ounion_oempty,GSYM oempty_def,union_def, set_def, oset_def]
1803  >>
1804  `(LENGTH vec = LENGTH table) /\
1805   (LENGTH final_states = LENGTH table) /\
1806   (!n. MEM n (get_accepts state_map) <=> n < LENGTH final_states /\ EL n final_states) /\
1807   (!n. (?l. ALOOKUP table n = SOME l) <=> n < LENGTH vec) /\
1808   (!n l. n < LENGTH vec ==> (LENGTH (EL n vec) = alphabet_size)) /\
1809   (!n c x.
1810     (?l. (ALOOKUP table n = SOME l) /\ (ALOOKUP l c = SOME x))
1811     <=>
1812       n < LENGTH vec /\ c < LENGTH (EL n vec) /\
1813       (EL c (EL n vec) = SOME x))`
1814     by
1815      (match_mp_tac table_to_vec_thm
1816        >> rw [sorted_get_accepts]
1817        >> metis_tac [mem_alphabet,alphabet_size_def])
1818  >> rw []
1819 >- (fs [MEM_EL] >> rw []
1820      >> metis_tac [EL_MEM,mem_alphabet,alphabet_size_def])
1821 >- (`!a. a < LENGTH table <=> MEM a (MAP FST table)`
1822        by metis_tac [MEM_PERM,MEM_COUNT_LIST]
1823      >> rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def,SYM dom_def]
1824      >> rw [dom_def,fdom_def, member_iff_lookup]
1825      >> metis_tac [good_table_def,THE_DEF])
1826 >- (`!a. a < LENGTH table <=> MEM a (MAP FST table)`
1827        by metis_tac [MEM_PERM,MEM_COUNT_LIST]
1828       >> rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def,SYM dom_def]
1829       >> simp_tac set_ss [dom_def,fdom_def, member_iff_lookup]
1830       >> metis_tac [THE_DEF])
1831QED
1832
1833Theorem compile_regexp_good_vec :
1834 !r state_map table final_states.
1835    dom_Brz empty (singleton (normalize r) ())
1836                 (1,singleton (normalize r) 0,[]) /\
1837    (compile_regexp r = (state_map, table, final_states))
1838    ==>
1839    good_vec table final_states /\
1840    (!r n. (lookup regexp_compare r state_map = SOME n) ==> n < LENGTH table) /\
1841    normalize r IN fdom regexp_compare state_map
1842Proof
1843 simp [compile_regexp_def]
1844  >> rpt gen_tac >> strip_tac
1845  >> fs []
1846  >> `?seen next_state state_map table.
1847         Brzozowski empty
1848               (singleton (normalize r) ())
1849               (1,singleton (normalize r) 0,[]) = (seen,next_state,state_map,table)`
1850       by metis_tac [pair_CASES]
1851  >> fs []
1852  >> `Brz_invariant empty (singleton (normalize r) ()) (1,singleton (normalize r) 0,[])`
1853     by
1854      (fs [Brz_invariant_def,invar_def,dom_def, normalize_thm, fmap_inj_def, good_table_def,
1855          singleton_thm,empty_def,invariant_def,mem_regexp_def,member_iff_lookup]
1856        >> rw [lookup_def]
1857           >- (fs[singleton_def,oin_def,member_def]
1858                >> BasicProvers.EVERY_CASE_TAC
1859                >- metis_tac[]
1860                >- metis_tac[eq_cmp_regexp_compare,eq_cmp_def,normalize_thm]
1861                >- metis_tac[])
1862           >- (fs [fdom_def, singleton_def,lookup_def]
1863                >> BasicProvers.EVERY_CASE_TAC
1864                >> rw []
1865                >> metis_tac [eq_cmp_regexp_compare, eq_cmp_def,normalize_thm])
1866           >- (rw [union_def,set_def,GSYM empty_def, GSYM oempty_def] >>
1867               rw [EXTENSION,fdom_def, osingleton_def, singleton_def, lookup_def] >>
1868               CASE_TAC)
1869           >- (rw [range_def,frange_def, EXTENSION, singleton_def,lookup_def,EQ_IMP_THM]
1870                >- (BasicProvers.EVERY_CASE_TAC >> rw [])
1871                >- (Q.EXISTS_TAC `normalize r` >> rw [regexp_compare_id] >> decide_tac))
1872           >- (rw [GSYM empty_def, GSYM oempty_def]
1873               >> rw [fdom_def, lookup_def, empty_def, oempty_def])
1874           >- (ntac 2 (pop_assum mp_tac)
1875                >> simp_tac set_ss [fdom_def,singleton_def,lookup_def]
1876                >> BasicProvers.EVERY_CASE_TAC
1877                >> rw[]
1878                >> metis_tac [eq_cmp_def, eq_cmp_regexp_compare]))
1879 >> imp_res_tac Brz_inv_thm
1880 >> imp_res_tac Brz_mono
1881 >> imp_res_tac Brz_inv_to_good_vec
1882 >> fs []
1883 >> rw []
1884    >- metis_tac []
1885    >- (`normalize r IN fdom regexp_compare (singleton (normalize r) 0)`
1886         by rw [submap_def,fdom_def,singleton_def,lookup_def,regexp_compare_id]
1887         >> metis_tac [submap_def])
1888QED
1889
1890Theorem vec_lang_correct :
1891 !table final_states max_char vec final_states'.
1892   (max_char = alphabet_size) /\
1893   SORTED $<= final_states /\
1894   (!x. MEM x final_states ==> x < LENGTH table) /\
1895   (!n l c. (ALOOKUP table n = SOME l) /\ (?x. ALOOKUP l c = SOME x) ==> c < max_char) /\
1896   (PERM (MAP FST table) (COUNT_LIST (LENGTH table))) /\
1897   (!n'. n' IN BIGUNION (IMAGE alist_range (alist_range table)) ==> n' < LENGTH table) /\
1898   (table_to_vectors table = vec) /\
1899   (accepts_to_vector final_states (LENGTH table) = final_states')
1900   ==>
1901   !n s. (!c. MEM c s ==> c < max_char) /\ n < LENGTH table
1902         ==>
1903        (table_lang final_states table n s
1904           <=>
1905         exec_dfa (fromList final_states')
1906                  (fromList (MAP fromList vec))
1907                  n
1908                  (MAP CHR s))
1909Proof
1910 rpt gen_tac
1911  >> strip_tac
1912  >> `(LENGTH vec = LENGTH table) /\
1913      (LENGTH final_states' = LENGTH table) /\
1914      (!n. MEM n final_states <=> n < LENGTH final_states' /\ EL n final_states') /\
1915      (!n. (?l. ALOOKUP table n = SOME l) <=> n < LENGTH vec) /\
1916      (!n l. n < LENGTH vec ==> (LENGTH (EL n vec) = max_char)) /\
1917      (!n c x.
1918        (?l. (ALOOKUP table n = SOME l) /\ (ALOOKUP l c = SOME x))
1919        <=>
1920        n < LENGTH vec /\ c < LENGTH (EL n vec) /\
1921        (EL c (EL n vec) = SOME x))`
1922        by (match_mp_tac table_to_vec_thm
1923             >> rw []
1924             >> metis_tac [])
1925  >> Induct_on `s`
1926  >> rw [table_lang_def, exec_dfa_thm,fromList_Vector,sub_def]
1927  >> `h < alphabet_size` by metis_tac []
1928  >> rw [ORD_CHR_lem]
1929  >> BasicProvers.EVERY_CASE_TAC
1930  >> fs []
1931     >- metis_tac [NOT_SOME_NONE]
1932     >- (`n < LENGTH (table_to_vectors table)` by metis_tac[]
1933         >> fs [EL_MAP,sub_def]
1934         >> metis_tac [SOME_11, NOT_SOME_NONE])
1935     >- (`n < LENGTH (table_to_vectors table)` by metis_tac[]
1936         >> fs [EL_MAP,sub_def]
1937         >> metis_tac [NOT_SOME_NONE])
1938     >- (`n < LENGTH (table_to_vectors table)` by metis_tac[]
1939         >> fs [EL_MAP,sub_def]
1940         >> `x' = x''` by metis_tac [SOME_11, NOT_SOME_NONE]
1941          >> rw []
1942          >> fs [fromList_Vector]
1943          >> FIRST_X_ASSUM match_mp_tac
1944          >> FIRST_X_ASSUM match_mp_tac
1945          >> rw [PULL_EXISTS]
1946          >> rw [alistTheory.alist_range_def, EXTENSION]
1947          >> metis_tac [])
1948QED
1949
1950
1951(*---------------------------------------------------------------------------*)
1952(* Main correctness theorem: strings accepted by DFA are just the strings    *)
1953(* specified by regexp r                                                     *)
1954(*---------------------------------------------------------------------------*)
1955
1956Theorem Brzozowski_correct :
1957 !r s.
1958   dom_Brz empty (singleton (normalize r) ())
1959           (1,singleton (normalize r) 0,[]) /\
1960   (!c. MEM c s ==> MEM (ORD c) ALPHABET)
1961   ==>
1962   (Brz_lang r s = regexp_lang (normalize r) s)
1963Proof
1964 rw [GSYM regexp_lang_smart_deriv, Brz_lang_def]
1965  >> UNABBREV_ALL_TAC
1966  >> fs [compile_regexp_def]
1967  >> `?seen next_state state_map table.
1968        Brzozowski empty (singleton (normalize r) ())
1969                  (1,singleton (normalize r) 0,[]) = (seen,next_state,state_map,table)`
1970       by metis_tac [pair_CASES,SND]
1971  >> fs [LET_THM]
1972  >> rw []
1973  >> `Brz_invariant empty (singleton (normalize r) ())
1974                   (1,singleton (normalize r) 0,[])`
1975      by (rw_tac set_ss [Brz_invariant_def,invar_def,dom_def, normalize_thm,
1976                      fmap_inj_def, good_table_def, singleton_thm,empty_def,
1977                      invariant_def,mem_regexp_def,member_iff_lookup]
1978           >- (fs[singleton_def,oin_def,member_def]
1979                >> BasicProvers.EVERY_CASE_TAC
1980                >- metis_tac[]
1981                >- metis_tac[eq_cmp_regexp_compare,eq_cmp_def,normalize_thm]
1982                >- metis_tac[])
1983           >- fs [lookup_def]
1984           >- (fs [fdom_def, singleton_def,lookup_def]
1985                >> BasicProvers.EVERY_CASE_TAC
1986                >> rw []
1987                >> metis_tac [eq_cmp_regexp_compare, eq_cmp_def,normalize_thm])
1988           >- (rw [union_def,set_def,GSYM empty_def, GSYM oempty_def] >>
1989               rw [EXTENSION,fdom_def, osingleton_def, singleton_def, lookup_def] >>
1990               CASE_TAC)
1991           >- (rw [range_def,frange_def, EXTENSION, singleton_def,lookup_def,EQ_IMP_THM]
1992                >- (BasicProvers.EVERY_CASE_TAC >> rw [])
1993                >- (Q.EXISTS_TAC `normalize r` >> rw [regexp_compare_id] >> decide_tac))
1994           >- (rw [GSYM empty_def, GSYM oempty_def]
1995               >> rw [fdom_def, lookup_def, empty_def, oempty_def])
1996           >- (ntac 2 (pop_assum mp_tac)
1997                >> simp_tac set_ss [fdom_def,singleton_def,lookup_def]
1998                >> BasicProvers.EVERY_CASE_TAC
1999                >> rw[]
2000                >> metis_tac [eq_cmp_def, eq_cmp_regexp_compare])
2001           >- fs [alistTheory.ALOOKUP_def]
2002           >- fs [alistTheory.ALOOKUP_def]
2003           >- fs [alistTheory.ALOOKUP_def])
2004 >> imp_res_tac Brz_inv_thm
2005 >> imp_res_tac Brz_mono
2006 >> imp_res_tac Brz_invariant_final
2007 >> fs [Brz_invariant_def]
2008 >> `smart_deriv_matches (normalize r) s
2009      <=>
2010     table_lang (get_accepts state_map) table (apply state_map (normalize r)) (MAP ORD s)`
2011     by (fs [invar_def,set_def,ounion_oempty,union_def]
2012          >> `fdom num_cmp (oimage num_cmp (apply state_map) seen)
2013               = frange regexp_compare state_map`
2014                    by (simp_tac set_ss [frange_def] >>
2015                        qpat_x_assum `dom _ = dom __` mp_tac >>
2016                        rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >>
2017                        fs[ounion_oempty, GSYM oempty_def] >>
2018                        simp_tac set_ss [fdom_def] >> metis_tac[THE_DEF])
2019          >> mp_tac (table_lang_correct
2020                   |> INST_TYPE [alpha |-> ``:num``]
2021                   |> Q.SPEC `get_accepts state_map`
2022                   |> Q.SPEC `table:(num,(num,num)alist)alist`
2023                   |> Q.SPEC `state_map:(regexp,num)balanced_map`)
2024          >> fs[]
2025          >> `(!n r. (lookup regexp_compare r state_map = SOME n) ==>
2026                     (MEM n (get_accepts state_map) <=> nullable r))`
2027               by (rw [mem_get_accepts,EQ_IMP_THM]
2028                    >- (`r' IN fdom regexp_compare state_map` by rw [fdom_def]
2029                         >> metis_tac [fmap_inj_def, eq_cmp_regexp_compare, eq_cmp_def])
2030                    >- metis_tac[])
2031          >> rw []
2032          >> pop_assum mp_tac
2033          >> imp_res_tac string_to_numlist
2034          >> DISCH_THEN (assume_tac o Q.SPECL [`0n`, `normalize r`, `nl`])
2035          >> rfs[]
2036          >> rw[]
2037          >> `normalize r IN fdom regexp_compare (singleton (normalize r) 0)`
2038                by (rw [fdom_def, singleton_def,lookup_def,EQ_IMP_THM]
2039                     >> BasicProvers.EVERY_CASE_TAC
2040                     >> fs [regexp_compare_id])
2041          >> `normalize r IN fdom regexp_compare state_map /\
2042              (lookup regexp_compare (normalize r) (singleton (normalize r) 0) =
2043               lookup regexp_compare (normalize r) state_map)`
2044              by metis_tac [submap_def]
2045          >> fs [singleton_def,lookup_def,regexp_compare_id]
2046          >> qpat_x_assum `table_lang (get_accepts state_map) table 0 nl <=>
2047                         smart_deriv_matches (normalize r) (MAP CHR nl)`
2048             (SUBST_ALL_TAC o SYM)
2049          >> rw [MAP_MAP_o,combinTheory.o_DEF,apply_def, fapply_def]
2050          >> pop_assum (SUBST_ALL_TAC o SYM)
2051          >> rw []
2052          >> `MAP (\x. ORD (CHR x)) nl = MAP (\x.x) nl`
2053              by (irule MAP_CONG >> rw [] >> metis_tac [mem_alphabet,ORD_CHR_lem])
2054         >> rw[])
2055 >> rw []
2056 >> imp_res_tac string_to_numlist
2057 >> rw [MAP_MAP_o,combinTheory.o_DEF,apply_def, fapply_def]
2058 >> `MAP (\x. ORD (CHR x)) nl = MAP (\x.x) nl`
2059              by (irule MAP_CONG >> rw [] >> metis_tac [mem_alphabet,ORD_CHR_lem])
2060 >> rw []
2061 >> match_mp_tac (GSYM (SIMP_RULE (srw_ss()) [PULL_FORALL, AND_IMP_INTRO] vec_lang_correct))
2062 >> rw []
2063    >- rw [sorted_get_accepts]
2064    >- metis_tac [mem_alphabet]
2065    >- (fs [good_table_def, alistTheory.alist_range_def]
2066         >> res_tac
2067         >> `n'' IN range state_map`
2068              by (simp_tac set_ss [range_def, frange_def] >> metis_tac[])
2069         >> `n'' IN count (LENGTH table)` by metis_tac [EXTENSION]
2070         >> metis_tac [IN_COUNT])
2071   >- metis_tac [mem_alphabet]
2072   >- (`normalize r IN fdom regexp_compare (singleton (normalize r) 0)`
2073          by (rw [fdom_def, singleton_def,lookup_def,EQ_IMP_THM]
2074               >> BasicProvers.EVERY_CASE_TAC
2075               >> fs [regexp_compare_id])
2076        >> `normalize r IN fdom regexp_compare state_map /\
2077            (lookup regexp_compare (normalize r) state_map =
2078             lookup regexp_compare (normalize r) (singleton (normalize r) 0))`
2079               by metis_tac [submap_def]
2080        >> rw [singleton_def,lookup_def,regexp_compare_id]
2081        >> `0n IN range state_map`
2082              by (simp_tac set_ss [range_def, frange_def]
2083                    >> Q.EXISTS_TAC `normalize r`
2084                    >> rw [singleton_def,lookup_def,EQ_IMP_THM]
2085                    >> CASE_TAC
2086                    >> fs [regexp_compare_id])
2087        >> `0n IN count (LENGTH table)` by metis_tac [EXTENSION]
2088        >> metis_tac [IN_COUNT])
2089QED
2090
2091Theorem Brzozowski_correct_alt :
2092 !r s.
2093    dom_Brz empty (singleton (normalize r) ())
2094            (1,singleton (normalize r) 0,[]) /\
2095    (!c. MEM c s ==> MEM (ORD c) ALPHABET)
2096   ==>
2097   (regexp_matcher r s = regexp_lang r s)
2098Proof
2099 rw []
2100  >> `Brz_lang r s = regexp_lang r s`
2101      by metis_tac [regexp_lang_normalize,Brzozowski_correct]
2102  >> pop_assum (SUBST_ALL_TAC o SYM)
2103  >> rw [Brz_lang_def, regexp_matcher_def, LET_THM,apply_def,fapply_def]
2104QED
2105
2106Theorem Brzozowski_partial_eval :
2107 !r state_numbering delta accepts deltaV acceptsV start_state.
2108    (compile_regexp r = (state_numbering,delta,accepts)) /\
2109    (deltaV = fromList (MAP fromList delta)) /\
2110    (acceptsV = fromList accepts) /\
2111    (lookup regexp_compare (normalize r) state_numbering = SOME start_state) /\
2112    dom_Brz empty (singleton (normalize r) ())
2113                  (1,singleton (normalize r) 0,[])
2114    ==>
2115    !s. EVERY (\c. MEM (ORD c) ALPHABET) s
2116    ==>
2117    (exec_dfa acceptsV deltaV start_state s = regexp_lang r s)
2118Proof
2119 rw [EVERY_MEM]
2120  >> `regexp_matcher r s = regexp_lang r s`
2121      by metis_tac [Brzozowski_correct_alt]
2122  >> pop_assum (SUBST_ALL_TAC o SYM)
2123  >> rw [regexp_matcher_def, LET_THM]
2124QED
2125
2126
2127Theorem Brzozowski_partial_eval_conseq :
2128 !r state_numbering delta accepts deltaV acceptsV start_state.
2129    (compile_regexp r = (state_numbering,delta,accepts)) /\
2130    (deltaV = fromList (MAP fromList delta)) /\
2131    (acceptsV = fromList accepts) /\
2132    (lookup regexp_compare (normalize r) state_numbering = SOME start_state) /\
2133    dom_Brz_alt empty (singleton (normalize r) ())
2134    ==>
2135    !s. EVERY (\c. ORD c < alphabet_size) s
2136    ==>
2137    (exec_dfa acceptsV deltaV start_state s = regexp_lang r s)
2138Proof
2139 metis_tac[Brzozowski_partial_eval,all_ord_string,
2140           dom_Brz_alt_equal,all_ord_string]
2141QED
2142
2143(*---------------------------------------------------------------------------*)
2144(* Eliminate check that all chars in string are in alphabet. This can be     *)
2145(* be done when alphabet = [0..255]                                          *)
2146(*---------------------------------------------------------------------------*)
2147
2148Theorem Brzozowski_partial_eval_256 =
2149  if Regexp_Type.alphabet_size = 256 then
2150   SIMP_RULE list_ss [ORD_BOUND,alphabet_size_def] Brzozowski_partial_eval_conseq
2151  else TRUTH;
2152
2153
2154Theorem regexp_matcher_correct:
2155   dom_Brz_alt empty (singleton (normalize r) ()) ==>
2156               (regexp_matcher r s <=> s IN regexp_lang r)
2157Proof
2158  rw[regexp_matcher_def]
2159  \\ pairarg_tac \\ fs[]
2160  \\ imp_res_tac compile_regexp_good_vec
2161  \\ rfs[dom_Brz_alt_equal,fdom_def]
2162  \\ imp_res_tac Brzozowski_partial_eval_256
2163  \\ simp[IN_DEF]
2164QED
2165
2166(* val _ = EmitTeX.tex_theory"-"; *)
2167
2168val _ = export_theory();
2169