1open HolKernel Parse boolLib bossLib lcsymtacs numLib;
2open optionTheory pairTheory relationTheory arithmeticTheory
3     pred_setTheory bagTheory containerTheory
4     listTheory rich_listTheory stringTheory sortingTheory mergesortTheory
5     comparisonTheory balanced_mapTheory eq_cmp_bmapTheory osetTheory
6     finite_mapTheory vec_mapTheory charsetTheory regexpTheory
7;
8
9val _ = numLib.prefer_num();
10
11fun pat_elim q = Q.PAT_X_ASSUM q (K ALL_TAC);
12
13val SET_EQ_THM = Q.prove
14(`!s1 s2. (s1 = s2) = !x. s1 x = s2 x`,
15 METIS_TAC [EXTENSION,IN_DEF]);
16
17val LENGTH_NIL_SYM =
18   GEN_ALL (CONV_RULE (LHS_CONV SYM_CONV) (SPEC_ALL LENGTH_NIL));
19
20val list_ss = list_ss ++ rewrites [LENGTH_NIL, LENGTH_NIL_SYM];
21
22val set_ss = list_ss ++ pred_setLib.PRED_SET_ss ++ rewrites [SET_EQ_THM,IN_DEF]
23
24val _ = new_theory "regexp_compiler";
25
26val comparison_distinct = TypeBase.distinct_of ``:ordering``
27
28val eq_cmp_regexp_compare = Q.prove
29(`eq_cmp regexp_compare`,
30 metis_tac [eq_cmp_def, regexp_compare_good,regexp_compare_eq]);
31
32(*---------------------------------------------------------------------------*)
33(* Output of the compiler is in term of vectors.                             *)
34(*---------------------------------------------------------------------------*)
35
36val _ = Hol_datatype `vector = Vector of 'a list`;
37
38val fromList_def = Define `fromList l = Vector l`;
39val sub_def      = Define `sub (Vector l) n = EL n l`;
40val length_def   = Define `length (Vector l) = LENGTH l`;
41
42val fromList_Vector = save_thm
43("fromList_Vector",
44 SIMP_RULE list_ss [GSYM FUN_EQ_THM] fromList_def);
45
46(*---------------------------------------------------------------------------*)
47(* Manipulate the set of seen states                                         *)
48(*---------------------------------------------------------------------------*)
49
50val _ = Parse.type_abbrev("regexp_set", ``:(regexp,unit)balanced_map``);
51
52val insert_regexp_def =
53 Define
54   `insert_regexp r seen = balanced_map$insert regexp_compare r () seen`;
55
56val mem_regexp_def =
57 Define
58   `mem_regexp r seen = balanced_map$member regexp_compare r seen`;
59
60(*---------------------------------------------------------------------------*)
61(* Transitions out of a state (regexp).                                      *)
62(*---------------------------------------------------------------------------*)
63
64val transitions_def =
65 Define
66   `transitions r = MAP (\c. (c,smart_deriv c r)) ALPHABET`;
67
68val extend_states_def =
69 Define
70   `(extend_states next_state state_map trans [] = (next_state,state_map,trans)) /\
71    (extend_states next_state state_map trans ((c,r')::t) =
72       case balanced_map$lookup regexp_compare r' state_map
73        of SOME n => extend_states next_state state_map ((c,n)::trans) t
74         | NONE   => extend_states (next_state + 1n)
75                        (balanced_map$insert regexp_compare r' next_state state_map)
76                        ((c,next_state)::trans)
77                        t)`;
78
79val extend_states_ind = fetch "-" "extend_states_ind";
80
81val build_table_def =
82 Define
83   `build_table arcs r (next_state,state_map,table) =
84     let (next_state,state_map,trans) = extend_states next_state state_map [] arcs
85     in case balanced_map$lookup regexp_compare r state_map
86         of SOME n => (next_state, state_map, (n,trans)::table)
87          | NONE   => (next_state + 1n,
88                       balanced_map$insert regexp_compare r next_state state_map,
89                       (next_state,trans)::table)`;
90
91(*---------------------------------------------------------------------------*)
92(* The core regexp compiler. The seen argument holds the already-seen        *)
93(* regexps, which map to states in the final DFA. The n argument is a        *)
94(* step-index used to ensure that the function terminates.                   *)
95(*---------------------------------------------------------------------------*)
96
97val Brz_def =
98 Define
99  `Brz seen worklist acc n =
100     if n <= 0n then NONE else
101     case worklist of
102        [] => SOME (seen,acc)
103     | r::t =>
104         if mem_regexp r seen then Brz seen t acc (n-1) else
105         let arcs = transitions r
106         in Brz (insert_regexp r seen)
107                (remove_dups (MAP SND arcs) ++ t)
108                (build_table arcs r acc)
109                (n-1)`;
110
111val MAXNUM_32_def = Define `MAXNUM_32:num = 2147483647`;
112
113(*---------------------------------------------------------------------------*)
114(* Build Greve-style Brz function                                            *)
115(*---------------------------------------------------------------------------*)
116
117val MAX_LE_THM = Q.prove
118(`!m n. m <= MAX m n /\ n <= MAX m n`,
119 RW_TAC arith_ss [MAX_DEF]);
120
121val IS_SOME_EXISTS = Q.prove
122(`!x. IS_SOME x = ?y. x = SOME y`,
123 Cases THEN METIS_TAC [optionTheory.IS_SOME_DEF]);
124
125(*---------------------------------------------------------------------------*)
126(* Domain of the function.                                                   *)
127(*---------------------------------------------------------------------------*)
128
129val dom_Brz_def =
130  Define
131   `dom_Brz seen worklist acc = ?d. IS_SOME(Brz seen worklist acc d)`;
132
133(*---------------------------------------------------------------------------*)
134(* Create measure function rdepth. Uses following code copied from           *)
135(*                                                                           *)
136(*       HOLDIR/src/pfl/defchoose                                            *)
137(*                                                                           *)
138(*---------------------------------------------------------------------------*)
139
140(*---------------------------------------------------------------------------*)
141(*  MINCHOOSE (sname, cname,``!x1 ... xn. ?z. M[x1,...,xn,z]``)              *)
142(*   returns                                                                 *)
143(*                                                                           *)
144(*    |- !x1...xn z. M[x1,...,xn,z] ==>                                      *)
145(*                   M[x1,...,xn,cname x1...xn] /\                           *)
146(*                   !m. M[x1,...,xn,m] ==> cname x1...xn <= m               *)
147(*                                                                           *)
148(* where cname in the theorem is a constant. This theorem is stored in the   *)
149(* current theory under sname. Thus this rule is a form of the               *)
150(* well-ordering property.                                                   *)
151(*---------------------------------------------------------------------------*)
152
153val WOP_THM = Q.prove
154(`!P. (?n. P n) ==> ?min. P min /\ !k. P k ==> min <= k`,
155 METIS_TAC [arithmeticTheory.WOP,DECIDE ``~(m<n) ==> n<=m``]);
156
157fun MINCHOOSE (store_name,const_name,tm) =
158 let val (V,body) = strip_forall tm
159     val P = snd(dest_comb body)
160     val wop_thm = BETA_RULE(SPEC P WOP_THM)
161     val min_term = snd(dest_imp (concl wop_thm))
162     val min_term_pred = snd(dest_comb min_term)
163     val th1 = BETA_RULE(GSYM (ISPECL [body,min_term_pred] RIGHT_EXISTS_IMP_THM))
164     val th2 = EQ_MP th1 wop_thm
165     val th3 = GENL V th2
166     val th4 = Ho_Rewrite.REWRITE_RULE[SKOLEM_THM] th3
167     val th5 = Ho_Rewrite.REWRITE_RULE[GSYM LEFT_FORALL_IMP_THM] th4
168 in
169  new_specification(store_name, [const_name], th5)
170 end
171 handle e => raise wrap_exn "" "MINCHOOSE" e;
172
173val rdepth_thm =
174   MINCHOOSE ("rdepth_thm", "rdepth",
175    ``!seen worklist acc. ?d. IS_SOME(Brz seen worklist acc d)``)
176
177(*---------------------------------------------------------------------------*)
178(* Define Brzozo                                                             *)
179(*---------------------------------------------------------------------------*)
180
181val Brzozo_def =
182 Define
183   `Brzozo seen worklist acc =
184      THE (Brz seen worklist acc (rdepth seen worklist acc))`;
185
186
187(*---------------------------------------------------------------------------*)
188(* Lemmas about Brz and definedness                                          *)
189(*---------------------------------------------------------------------------*)
190
191val IS_SOME_Brz = Q.prove
192(`!d seen worklist acc.
193      IS_SOME (Brz seen worklist acc d)
194         ==>
195        d <> 0`,
196 Cases >> rw[Once Brz_def])
197
198val Brz_SOME = Q.prove
199(`!d seen worklist acc res.
200   (Brz seen worklist acc d = SOME res)
201   ==> d <> 0`,
202 METIS_TAC [IS_SOME_Brz,IS_SOME_EXISTS]);
203
204val Brz_dlem = Q.prove
205(`!d seen worklist acc.
206  IS_SOME (Brz seen worklist acc d)
207   ==>
208  (Brz seen worklist acc d = Brz seen worklist acc (SUC d))`,
209 Ho_Rewrite.REWRITE_TAC [IS_SOME_EXISTS,GSYM LEFT_FORALL_IMP_THM]
210 >> Induct
211    >- metis_tac [Brz_SOME]
212    >- (rw[] >> rw[Once Brz_def,LET_THM]
213         >> pop_assum mp_tac
214         >> BasicProvers.EVERY_CASE_TAC
215            >- rw [Brz_def]
216            >- (rw [Once Brz_def] >> metis_tac [])
217            >- (DISCH_THEN (mp_tac o SIMP_RULE list_ss [Once Brz_def])
218                 >> rw[LET_THM]
219                 >> metis_tac[]))
220);
221
222val Brz_monotone = Q.prove
223(`!d1 d2 seen worklist acc.
224      IS_SOME(Brz seen worklist acc d1) /\ d1 <= d2
225       ==> (Brz seen worklist acc d1 = Brz seen worklist acc d2)`,
226 RW_TAC arith_ss [LESS_EQ_EXISTS] THEN
227 Induct_on `p` THEN METIS_TAC [ADD_CLAUSES,Brz_dlem]);
228
229val Brz_norm = Q.prove
230(`!d seen worklist acc.
231   IS_SOME(Brz seen worklist acc d)
232    ==>
233   (Brz seen worklist acc d = Brz seen worklist acc (rdepth seen worklist acc))`,
234  METIS_TAC [Brz_monotone,rdepth_thm]);
235
236val Brz_determ = Q.store_thm("Brz_determ",
237 `!d1 d2 seen worklist acc.
238    IS_SOME(Brz seen worklist acc d1) /\ IS_SOME(Brz seen worklist acc d2)
239       ==> (Brz seen worklist acc d1 = Brz seen worklist acc d2)`,
240  METIS_TAC [Brz_norm]);
241
242(*---------------------------------------------------------------------------*)
243(* Derive eqns for dom_Brz                                                   *)
244(*---------------------------------------------------------------------------*)
245
246val lem = Q.prove
247(`!seen worklist acc.
248   (worklist = []) ==> IS_SOME (Brz seen worklist acc 1)`,
249 RW_TAC arith_ss [Once Brz_def]);
250
251val dom_base_case = Q.prove
252(`!seen worklist acc.
253   (worklist = []) ==> dom_Brz seen worklist acc`,
254 METIS_TAC [dom_Brz_def, lem]);
255
256val step2_lem1a = Q.prove
257(`!seen worklist acc r t.
258   (worklist = r::t) /\ mem_regexp r seen /\ dom_Brz seen worklist acc
259    ==> dom_Brz seen t acc`,
260 RW_TAC std_ss [dom_Brz_def] THEN
261 `d<>0` by METIS_TAC [IS_SOME_Brz] THEN
262 Q.EXISTS_TAC `d-1` THEN
263 Q.PAT_X_ASSUM `IS_SOME arg` (MP_TAC o ONCE_REWRITE_RULE [Brz_def]) THEN
264 CASE_TAC THEN RW_TAC arith_ss [LET_THM]
265 THEN METIS_TAC[]);
266
267val step2_lem1b = Q.prove
268(`!seen worklist acc r t arcs.
269   (worklist = r::t) /\ ~mem_regexp r seen /\
270   (arcs = transitions r) /\  dom_Brz seen worklist acc
271    ==> dom_Brz (insert_regexp r seen)
272                (remove_dups (MAP SND arcs) ++ t)
273                (build_table arcs r acc)`,
274 RW_TAC std_ss [dom_Brz_def] THEN
275 `d<>0` by METIS_TAC [IS_SOME_Brz] THEN
276 Q.EXISTS_TAC `d-1` THEN
277 Q.PAT_X_ASSUM `IS_SOME arg` (MP_TAC o ONCE_REWRITE_RULE [Brz_def]) THEN
278 CASE_TAC THEN RW_TAC arith_ss [LET_THM]
279 THEN METIS_TAC[]);
280
281val step2_lem2a = Q.prove
282(`!seen worklist acc r t.
283  (worklist = r::t) /\ mem_regexp r seen /\ dom_Brz seen t acc
284   ==> dom_Brz seen worklist acc`,
285 rw_tac std_ss [dom_Brz_def]
286   >> Q.EXISTS_TAC `SUC d`
287   >> rw [Once Brz_def]);
288
289val step2_lem2b = Q.prove
290(`!seen worklist acc r t arcs.
291  (worklist = r::t) /\ (arcs = transitions r) /\
292  ~mem_regexp r seen /\
293  dom_Brz (insert_regexp r seen)
294          (remove_dups (MAP SND arcs) ++ t)
295          (build_table arcs r acc)
296   ==> dom_Brz seen worklist acc`,
297 rw_tac std_ss [dom_Brz_def]
298   >> Q.EXISTS_TAC `SUC d`
299   >> rw [Once Brz_def,LET_THM]);
300
301(*---------------------------------------------------------------------------*)
302(* Equational characterization of dom.                                       *)
303(*---------------------------------------------------------------------------*)
304
305val dom_Brz_eqns = Q.store_thm
306("dom_Brz_eqns",
307 `(dom_Brz seen [] acc = T) /\
308  (dom_Brz seen (r::t) acc =
309    if mem_regexp r seen
310       then dom_Brz seen t acc
311       else let arcs = transitions r
312            in dom_Brz (insert_regexp r seen)
313                       (remove_dups (MAP SND arcs) ++ t)
314                       (build_table arcs r acc))`,
315 CONJ_TAC
316  >- metis_tac [dom_base_case]
317  >- metis_tac [step2_lem1a,step2_lem1b,step2_lem2a,step2_lem2b,LET_THM]);
318
319(*---------------------------------------------------------------------------*)
320(* Optimization: dom_Brz doesn't depend on its "acc" argument, so can be     *)
321(* replaced by a version that doesn't have it.                               *)
322(*---------------------------------------------------------------------------*)
323
324val dom_Brz_alt_def =
325 Define
326  `dom_Brz_alt seen worklist = dom_Brz seen worklist ARB`;
327
328val acc_irrelevant = Q.prove
329(`!n seen worklist acc acc1.
330     IS_SOME (Brz seen worklist acc n) ==>
331     IS_SOME (Brz seen worklist acc1 n)`,
332 Induct
333   >- rw [Once Brz_def]
334   >- (ONCE_REWRITE_TAC [Brz_def]
335        >> rw_tac list_ss [LET_THM]
336        >> BasicProvers.EVERY_CASE_TAC
337           >- rw []
338           >- metis_tac[]
339           >- metis_tac[]));
340
341val dom_Brz_alt_equal = Q.store_thm
342("dom_Brz_alt_equal",
343 `!seen worklist acc. dom_Brz seen worklist acc = dom_Brz_alt seen worklist`,
344 rw [dom_Brz_def, dom_Brz_alt_def] >> metis_tac [acc_irrelevant]);
345
346
347(*---------------------------------------------------------------------------*)
348(*    |- dom_Brz_alt seen [] /\                                              *)
349(*       (dom_Brz_alt seen (r::t) =                                          *)
350(*         if mem_regexp r seen then                                         *)
351(*            dom_Brz_alt seen t                                             *)
352(*         else                                                              *)
353(*            (let arcs = transitions r                                      *)
354(*             in dom_Brz_alt (insert_regexp r seen)                         *)
355(*                            (remove_dups (MAP SND arcs) ++ t)))            *)
356(*---------------------------------------------------------------------------*)
357
358val dom_Brz_alt_eqns = save_thm
359("dom_Brz_alt_eqns",
360 SIMP_RULE bool_ss [dom_Brz_alt_equal] dom_Brz_eqns);
361
362(*---------------------------------------------------------------------------*)
363(* Recursion equations for Brzozo                                            *)
364(*---------------------------------------------------------------------------*)
365
366val Brzozo_base = Q.prove
367(`!seen worklist acc.
368    dom_Brz seen worklist acc /\ (worklist = [])
369    ==>
370   (Brzozo seen worklist acc = (seen,acc))`,
371 RW_TAC std_ss [Brzozo_def,dom_Brz_def]
372  >> `rdepth seen [] acc <> 0`
373       by METIS_TAC [IS_SOME_Brz,rdepth_thm]
374  >> RW_TAC arith_ss [Once Brz_def]);
375
376val Brzozo_rec1 = Q.prove
377(`!seen worklist accr r t.
378    (worklist = r::t) /\
379    dom_Brz seen worklist acc /\
380    mem_regexp r seen
381    ==>
382    (Brzozo seen worklist acc = Brzozo seen t acc)`,
383 RW_TAC std_ss [Brzozo_def,dom_Brz_def]
384  >> `d <> 0` by METIS_TAC [IS_SOME_Brz]
385  >> `Brz seen (r::t) acc d = Brz seen t acc (d-1)`
386      by rw_tac list_ss [Once Brz_def]
387  >> METIS_TAC [IS_SOME_EXISTS,NOT_SOME_NONE,THE_DEF,Brz_norm]);
388
389val Brzozo_rec2 = Q.prove
390(`!seen worklist accr r t.
391    (worklist = r::t) /\
392    dom_Brz seen worklist acc /\
393    ~mem_regexp r seen
394    ==>
395    (Brzozo seen worklist acc =
396       let arcs = transitions r
397       in Brzozo (insert_regexp r seen)
398                 (remove_dups (MAP SND arcs) ++ t)
399                 (build_table arcs r acc))`,
400 rw_tac std_ss [Brzozo_def,dom_Brz_def,LET_THM]
401  >> `d <> 0` by METIS_TAC [IS_SOME_Brz]
402  >> `Brz seen (r::t) acc d =
403      let arcs = transitions r
404      in Brz (insert_regexp r seen)
405          (remove_dups (MAP SND arcs) ++ t)
406          (build_table arcs r acc) (d-1)`
407      by rw_tac list_ss [Once Brz_def,LET_THM]
408  >> METIS_TAC [IS_SOME_EXISTS,NOT_SOME_NONE,THE_DEF,Brz_norm,LET_THM]);
409
410(*---------------------------------------------------------------------------*)
411(* Equational characterization of Brzozo.                                    *)
412(*---------------------------------------------------------------------------*)
413
414val Brzozo_eqns = Q.prove
415(`!seen worklist acc.
416   dom_Brz seen worklist acc
417  ==>
418   (Brzozo seen worklist acc =
419     case worklist of
420        []  => (seen,acc)
421     | r::t =>
422        if mem_regexp r seen then
423           Brzozo seen t acc
424        else
425          let arcs = transitions r
426          in
427           Brzozo (insert_regexp r seen)
428                  (remove_dups (MAP SND arcs) ++ t)
429                  (build_table arcs r acc))`,
430 Cases_on `worklist` >> rpt gen_tac >> CASE_TAC >> rw[LET_THM]
431  >- metis_tac [Brzozo_base]
432  >- metis_tac [Brzozo_rec1]
433  >- metis_tac [Brzozo_rec2]
434);
435
436(*---------------------------------------------------------------------------*)
437(* Now prove induction theorem. This is based on using rdepth as a measure   *)
438(* on the recursion. Thus we first have some lemmas about how rdepth         *)
439(* decreases in recursive calls.                                             *)
440(*---------------------------------------------------------------------------*)
441
442val step3a_lt = Q.prove
443(`!seen worklist acc r t.
444    (worklist = r::t) /\ mem_regexp r seen /\
445    dom_Brz seen worklist acc
446    ==> rdepth seen t acc  < rdepth seen worklist acc`,
447 rw_tac std_ss [dom_Brz_def] THEN IMP_RES_TAC rdepth_thm THEN
448   `rdepth seen (r::t) acc <> 0` by METIS_TAC [IS_SOME_Brz] THEN
449   `rdepth seen (r::t) acc - 1 < rdepth seen (r::t) acc` by DECIDE_TAC THEN
450   `IS_SOME (Brz seen t acc (rdepth seen (r::t) acc - 1))`
451     by (Q.PAT_X_ASSUM `IS_SOME (Brz seen (r::t) acc (rdepth seen (r::t) acc))`
452          (mp_tac o SIMP_RULE arith_ss [Once Brz_def])
453        >> CASE_TAC >> rw[LET_THM]
454           >- metis_tac[]
455           >- metis_tac[])
456    >> `rdepth seen t acc <= rdepth seen (r::t) acc - 1`
457        by metis_tac [rdepth_thm]
458    >> decide_tac);
459
460val step3b_lt = Q.prove
461(`!seen worklist acc r t arcs.
462    (worklist = r::t) /\ ~mem_regexp r seen /\
463    (arcs = transitions r) /\ dom_Brz seen worklist acc
464    ==> rdepth (insert_regexp r seen)
465               (remove_dups (MAP SND arcs) ++ t)
466               (build_table arcs r acc)  < rdepth seen worklist acc`,
467 rw_tac std_ss [dom_Brz_def] THEN IMP_RES_TAC rdepth_thm
468  >> `rdepth seen (r::t) acc <> 0` by METIS_TAC [IS_SOME_Brz]
469  >> `rdepth seen (r::t) acc - 1 < rdepth seen (r::t) acc` by DECIDE_TAC
470  >> `IS_SOME (Brz (insert_regexp r seen)
471                   (remove_dups (MAP SND (transitions r)) ++ t)
472                   (build_table (transitions r) r acc)
473                   (rdepth seen (r::t) acc - 1))`
474     by (Q.PAT_X_ASSUM `IS_SOME (Brz seen (r::t) acc (rdepth seen (r::t) acc))`
475          (mp_tac o SIMP_RULE arith_ss [Once Brz_def])
476        >> CASE_TAC >> rw[LET_THM]
477           >- metis_tac[]
478           >- metis_tac[])
479    >> `rdepth (insert_regexp r seen)
480               (remove_dups (MAP SND (transitions r)) ++ t)
481               (build_table (transitions r) r acc) <= rdepth seen (r::t) acc - 1`
482        by metis_tac [rdepth_thm]
483    >> decide_tac);
484
485(*---------------------------------------------------------------------------*)
486(* Induction for Brzozo is obtained by instantiating the WF induction        *)
487(* theorem with the rdepth measure and then simplifying.                     *)
488(*---------------------------------------------------------------------------*)
489
490val ind0 = MATCH_MP relationTheory.WF_INDUCTION_THM
491                    (Q.ISPEC `\(seen,worklist,acc). rdepth seen worklist acc`
492                             prim_recTheory.WF_measure);
493val ind1 = SIMP_RULE std_ss [prim_recTheory.measure_thm] ind0;
494val ind2 = SIMP_RULE std_ss [pairTheory.FORALL_PROD]
495      (Q.ISPEC `\(seen,worklist,acc).
496                    dom_Brz seen worklist acc ==> P seen worklist acc` ind1);
497val ind3 = Q.prove
498(`!P. (!seen worklist acc.
499         (!a b c. rdepth a b c < rdepth seen worklist acc
500                  ==> dom_Brz a b c
501                  ==> P a b c)
502         ==> dom_Brz seen worklist acc ==> P seen worklist acc)
503  ==>
504  !seen worklist acc. dom_Brz seen worklist acc ==> P seen worklist acc`,
505rpt strip_tac
506 >> pop_assum mp_tac
507 >> Cases_on `acc`
508 >> Cases_on `r`
509 >> Q.SPEC_TAC (`r'`,`w`)
510 >> Q.SPEC_TAC (`q'`,`v`)
511 >> Q.SPEC_TAC (`q`,`u`)
512 >> Q.ID_SPEC_TAC `worklist`
513 >> Q.ID_SPEC_TAC `seen`
514 >> match_mp_tac ind2
515 >> fs [FORALL_PROD]);
516
517val Brzozowski_ind = Q.store_thm
518("Brzozowski_ind",
519 `!P.
520   (!seen worklist acc.
521       dom_Brz seen worklist acc /\
522       (!r t. (worklist = r::t) /\ mem_regexp r seen ==> P seen t acc) /\
523       (!r t arcs. (arcs = transitions r) /\ (worklist = r::t) /\ ~mem_regexp r seen
524                   ==> P (insert_regexp r seen)
525                         (remove_dups (MAP SND arcs) ++ t)
526                         (build_table arcs r acc))
527         ==> P seen worklist acc)
528  ==> !seen worklist acc. dom_Brz seen worklist acc ==> P seen worklist acc`,
529  gen_tac >> strip_tac
530  >> HO_MATCH_MP_TAC ind3
531  >> rw[]
532  >> metis_tac [step3a_lt,step3b_lt,dom_Brz_eqns]);
533
534(*---------------------------------------------------------------------------*)
535(* Efficient executable version of Brzozo                                    *)
536(*---------------------------------------------------------------------------*)
537
538val exec_Brz_def =
539 Define
540 `exec_Brz seen worklist acc d =
541    if d=0n then
542       (if dom_Brz seen worklist acc then Brzozo seen worklist acc else ARB)
543    else
544      case worklist
545       of [] => (seen,acc)
546       | r::t =>
547         if mem_regexp r seen then
548             exec_Brz seen t acc (d ��� 1)
549           else
550             let arcs = transitions r
551             in exec_Brz (insert_regexp r seen)
552                         (remove_dups (MAP SND arcs) ++ t)
553                         (build_table arcs r acc) (d ��� 1)`;
554
555val exec_Brz_equals_Brzozo = Q.prove
556(`!d seen worklist acc.
557    dom_Brz seen worklist acc
558   ==>
559    (exec_Brz seen worklist acc d = Brzozo seen worklist acc)`,
560 Induct
561  >> rw_tac std_ss [Once exec_Brz_def,LET_THM]
562  >> pop_assum mp_tac
563  >> CASE_TAC
564    >- rw [Brzozo_eqns,dom_Brz_eqns]
565    >- rw [Brzozo_eqns,dom_Brz_eqns,LET_THM]);
566
567val Brzozowski_def =
568 Define
569   `Brzozowski seen worklist acc =
570      if dom_Brz seen worklist acc then
571         Brzozo seen worklist acc
572      else
573         exec_Brz seen worklist acc MAXNUM_32`;
574
575(*---------------------------------------------------------------------------*)
576(* Theorem showing that Brzozowski can be executed w.o. termination proof.   *)
577(* In order to evaluate Brzozowski, we can dispatch to exec_Brz.             *)
578(*---------------------------------------------------------------------------*)
579
580val Brzozowski_exec_Brz = Q.store_thm
581("Brzozowski_exec_Brz",
582 `Brzozowski seen worklist acc = exec_Brz seen worklist acc MAXNUM_32`,
583 rw_tac std_ss [Brzozowski_def,exec_Brz_equals_Brzozo]);
584
585(*---------------------------------------------------------------------------*)
586(* In order to reason about Brzozowski, we use the following theorem.        *)
587(*---------------------------------------------------------------------------*)
588
589val Brzozowski_eqns = Q.store_thm
590("Brzozowski_eqns",
591 `dom_Brz seen worklist acc ==>
592   (Brzozowski seen worklist acc =
593      case worklist
594       of [] => (seen,acc)
595       | r::t =>
596         if mem_regexp r seen then
597             Brzozowski seen t acc
598           else
599             let arcs = transitions r
600             in Brzozowski (insert_regexp r seen)
601                           (remove_dups (MAP SND arcs) ++ t)
602                           (build_table arcs r acc))`,
603 rw_tac std_ss [Brzozowski_def]
604  >> CASE_TAC
605     >- rw [Brzozo_eqns]
606     >- (rw [Brzozo_eqns,LET_THM]
607          >> metis_tac [exec_Brz_equals_Brzozo,dom_Brz_eqns]));
608
609
610(*---------------------------------------------------------------------------*)
611(* Return to definition of compiler                                          *)
612(*---------------------------------------------------------------------------*)
613
614val get_accepts_def =
615 Define
616  `get_accepts fmap =
617     mergesort (\a b:num. a <= b)
618       (balanced_map$foldrWithKey
619         (\r (n:num) nlist. if nullable r then n::nlist else nlist)
620         [] fmap)`;
621
622val accepts_to_vector_def =
623 Define
624  `accepts_to_vector accept_states max_state =
625     alist_to_vec (MAP (\x. (x,T)) accept_states) F max_state max_state`;
626
627val table_to_vectors_def =
628 Define
629  `table_to_vectors table =
630     MAP (\(k:num,table2).
631             alist_to_vec (mergesort (inv_image (\a b:num. a <= b) FST)
632                                 (MAP (\(c,n). (c, SOME n)) table2))
633                          NONE alphabet_size alphabet_size)
634         (mergesort (inv_image (\a b:num. a <= b) FST) table)`;
635
636val compile_regexp_def =
637 Define
638   `compile_regexp r =
639      let r' = normalize r in
640      let (states,last_state,state_numbering,table)
641         = Brzozowski balanced_map$empty [r']
642                      (1n, balanced_map$singleton r' 0n, []) in
643      let delta_vecs = table_to_vectors table in
644      let accepts_vec = accepts_to_vector (get_accepts state_numbering) last_state
645      in
646         (state_numbering,
647          delta_vecs,
648          accepts_vec)`;
649
650val exec_dfa_def =
651 Define
652  `exec_dfa finals table n s =
653   case s of
654    | "" => sub finals n
655    | c::t =>
656      case sub (sub table n) (ORD c) of
657       | NONE => F
658       | SOME k => exec_dfa finals table k t`;
659
660val exec_dfa_thm = Q.prove
661(`(exec_dfa finals table n [] = sub finals n) /\
662  (exec_dfa finals table n (c::t) =
663    case sub (sub table n) (ORD c) of
664     | NONE => F
665     | SOME k => exec_dfa finals table k t)`,
666CONJ_TAC
667 >- rw_tac list_ss [exec_dfa_def]
668 >- rw_tac list_ss [SimpLHS, Once exec_dfa_def])
669
670(*
671val exec_dfa_def =
672  Define
673   `(exec_dfa finals table n [] = EL n finals) /\
674    (exec_dfa finals table n (c::t) =
675        case EL (ORD c) (EL n table) of
676         | NONE => F
677         | SOME k => exec_dfa finals table k t)`;
678*)
679
680
681(*---------------------------------------------------------------------------*)
682(* Returns a function of type :string -> bool                                *)
683(*---------------------------------------------------------------------------*)
684
685val regexp_matcher_def =
686 Define
687  `regexp_matcher r =
688    let (state_numbering,delta,accepts) = compile_regexp r in
689    let start_state = THE (balanced_map$lookup regexp_compare
690                               (normalize r) state_numbering) in
691    let acceptsV = fromList accepts in
692    let deltaV = fromList (MAP fromList delta)
693    in
694      exec_dfa acceptsV deltaV start_state`;
695
696(*
697val regexp_matcher_def =
698 Define
699  `regexp_matcher r =
700    let (state_numbering,delta,accepts) = compile_regexp r in
701    let start_state_opt = balanced_map$lookup regexp_compare (normalize r) state_numbering
702    in
703      exec_dfa accepts delta (THE start_state_opt)`;
704*)
705
706(*---------------------------------------------------------------------------*)
707(* get_accepts properties                                                    *)
708(*---------------------------------------------------------------------------*)
709
710val sorted_get_accepts = Q.store_thm
711("sorted_get_accepts",
712 `!fmap. SORTED $<= (get_accepts fmap)`,
713 RW_TAC set_ss [get_accepts_def] THEN
714 CONV_TAC (DEPTH_CONV ETA_CONV) THEN
715 MATCH_MP_TAC mergesort_sorted THEN
716 RW_TAC arith_ss [transitive_def, total_def]);
717
718val lookup_bin = Q.prove
719(`!fmap fmap' n k v x.
720      (lookup cmp r (Bin n k v fmap fmap') = SOME x)
721      <=>
722      ((cmp r k = Equal) /\ (v = x)) \/
723      ((cmp r k = Less) /\ (lookup cmp r fmap = SOME x)) \/
724      ((cmp r k = Greater) /\ (lookup cmp r fmap' = SOME x))`,
725 RW_TAC list_ss [lookup_def] THEN BasicProvers.CASE_TAC);
726
727val member_iff_lookup = Q.prove
728(`!fmap cmp x. member cmp x fmap <=> ?y. lookup cmp x fmap = SOME y`,
729 Induct
730   >> rw_tac set_ss [member_def, lookup_def]
731   >> BasicProvers.EVERY_CASE_TAC);
732
733val member_bin = Q.prove
734(`!fmap fmap' n k v.
735     member cmp r (Bin n k v fmap fmap')
736      <=>
737      ((cmp r k = Equal) \/
738      ((cmp r k = Less) /\ member cmp r fmap) \/
739      ((cmp r k = Greater) /\ member cmp r fmap'))`,
740 RW_TAC list_ss [member_def] THEN BasicProvers.CASE_TAC);
741
742val key_less_lookup = Q.prove
743(`!fmap cmp k1 k2 x.
744     invariant cmp fmap /\ good_cmp cmp /\
745     key_ordered cmp k1 fmap Less /\
746     (lookup cmp k2 fmap = SOME x)
747     ==>
748     (cmp k1 k2 = Less)`,
749Induct >> rw_tac list_ss [key_ordered_def, lookup_def, invariant_def]
750       >> every_case_tac
751          >- metis_tac []
752          >- metis_tac [good_cmp_def,comparison_distinct]
753          >- metis_tac []);
754
755val key_greater_lookup = Q.prove
756(`!fmap cmp k1 k2 x.
757     invariant cmp fmap /\ good_cmp cmp /\
758     key_ordered cmp k1 fmap Greater /\
759     (lookup cmp k2 fmap = SOME x)
760     ==>
761     (cmp k2 k1 = Less)`,
762Induct >> rw_tac list_ss [key_ordered_def, lookup_def, invariant_def]
763       >> every_case_tac
764          >- metis_tac []
765          >- metis_tac [good_cmp_def,comparison_distinct]
766          >- metis_tac []);
767
768val lemma = Q.prove
769(`!fmap x acc.
770    balanced_map$invariant regexp_compare fmap /\
771    MEM x (foldrWithKey
772               (��r n nlist. if nullable r then n::nlist else nlist)
773               acc fmap)
774     ==>
775    MEM x acc \/ ?r. nullable r /\ (lookup regexp_compare r fmap = SOME x)`,
776 Induct THEN RW_TAC std_ss []
777  >- full_simp_tac list_ss [foldrWithKey_def,lookup_def]
778  >- (simp_tac list_ss [lookup_bin]
779       >>
780      `invariant regexp_compare fmap /\
781       invariant regexp_compare fmap' /\
782       key_ordered regexp_compare k fmap Greater /\
783       key_ordered regexp_compare k fmap' Less`
784           by metis_tac [invariant_def]
785      >> qpat_x_assum `invariant cmp (Bin n k v fmap fmap')` (K ALL_TAC)
786      >> fs []
787      >> RULE_ASSUM_TAC (REWRITE_RULE [foldrWithKey_def])
788      >> RES_THEN MP_TAC >> BETA_TAC >> rw[]
789        >- metis_tac [regexp_compare_eq]
790        >- metis_tac [key_less_lookup,regexp_compare_good,good_cmp_thm]
791        >- metis_tac [key_greater_lookup,regexp_compare_good,good_cmp_thm]
792        >- metis_tac [key_less_lookup,regexp_compare_good,good_cmp_thm]
793        >- metis_tac [key_greater_lookup,regexp_compare_good,good_cmp_thm]));;
794
795val mem_acc = Q.prove
796(`!P fmap x acc.
797    MEM x acc
798     ==>
799     MEM x (foldrWithKey
800               (��r n nlist. if P r then n::nlist else nlist)
801               acc fmap)`,
802 gen_tac >> Induct >> RW_TAC std_ss [foldrWithKey_def] >> metis_tac [MEM]);
803
804val lemma1 = Q.prove
805(`!P fmap r x acc.
806    P r /\ (lookup regexp_compare r fmap = SOME x)
807     ==>
808     MEM x (foldrWithKey (��r n nlist. if P r then n::nlist else nlist) acc fmap)`,
809 gen_tac >> Induct
810 >- rw_tac list_ss [lookup_def,foldrWithKey_def]
811 >- (rw_tac list_ss [lookup_bin,regexp_compare_eq,foldrWithKey_def]
812      >> metis_tac [mem_acc,MEM]));
813
814val mem_get_accepts = Q.store_thm
815("mem_get_accepts",
816 `!bmap x.
817    invariant regexp_compare bmap ==>
818    (MEM x (get_accepts bmap)
819       <=>
820     ?r. nullable r /\ (lookup regexp_compare r bmap = SOME x))`,
821 rw_tac list_ss [get_accepts_def,mergesort_mem] >> metis_tac [lemma,lemma1,MEM]);
822
823
824(*---------------------------------------------------------------------------*)
825(* Correctness of regexp compiler                                            *)
826(*---------------------------------------------------------------------------*)
827
828val fmap_inj_def =
829 Define
830  `fmap_inj cmp bmap =
831     !x y. x IN fdom cmp bmap /\ (lookup cmp x bmap = lookup cmp y bmap)
832            ==> (cmp x y = Equal)`;
833
834val extend_states_inv_def =
835 Define
836  `extend_states_inv next_state state_map table =
837     (invariant regexp_compare state_map /\
838      fmap_inj regexp_compare state_map /\
839      (frange regexp_compare state_map = count next_state) /\
840      (!n. MEM n (MAP SND table) ��� n < next_state))`;
841
842val extend_states_inv = Q.prove (
843`!next_state state_map table states next_state' state_map' table'.
844  (extend_states next_state state_map table states = (next_state',state_map',table')) ���
845  extend_states_inv next_state state_map table
846  ���
847  extend_states_inv next_state' state_map' table'`,
848 ho_match_mp_tac extend_states_ind
849  >> rw [extend_states_def]
850  >> BasicProvers.EVERY_CASE_TAC
851  >> fs []
852  >> FIRST_X_ASSUM match_mp_tac
853  >> rw []
854  >> fs [extend_states_inv_def, frange_def, MEM_MAP,EXTENSION,GSYM LEFT_FORALL_IMP_THM]
855  >> `good_cmp regexp_compare /\ eq_cmp regexp_compare`
856        by metis_tac [eq_cmp_regexp_compare,regexp_compare_good]
857  >> rw_tac set_ss []
858  >- metis_tac [insert_thm]
859  >- (fs [fmap_inj_def,lookup_insert_thm]
860       >> rw [regexp_compare_eq,fdom_insert]
861       >> metis_tac [DECIDE ``x < (x:num) ==> F``,eq_cmp_def])
862  >- (rw [lookup_insert_thm,regexp_compare_eq,EQ_IMP_THM]
863       >- (pop_assum mp_tac >> rw_tac set_ss []
864            >> metis_tac [DECIDE ``x < x + 1n``,LESS_TRANS])
865       >- (`x < next_state \/ (x=next_state)` by DECIDE_TAC
866             >> metis_tac [NOT_SOME_NONE]))
867  >- rw_tac set_ss []
868  >- metis_tac [LESS_TRANS,DECIDE ``x < x+1n``,IN_DEF]
869  >- (rw_tac set_ss [] >> metis_tac[])
870  >- metis_tac [LESS_TRANS,DECIDE ``x < x+1n``,IN_DEF])
871
872val submap_def =
873 Define
874   `submap cmp t1 t2 =
875         !x. x IN fdom cmp t1 ==> x IN fdom cmp t2 /\ (lookup cmp x t1 = lookup cmp x t2)`;
876
877val submap_id = Q.prove
878(`!t cmp. submap cmp t t`,
879 rw [submap_def]);
880
881val submap_trans = Q.prove
882(`!t1 t2 t3 cmp. submap cmp t1 t2 /\ submap cmp t2 t3 ==> submap cmp t1 t3`,
883 rw [submap_def]);
884
885val submap_mono = Q.prove
886(`!t1 t2 k v cmp.
887    eq_cmp cmp /\ invariant cmp t2 /\ submap cmp t1 t2
888    /\ k NOTIN fdom cmp t1
889   ==>
890   submap cmp t1 (insert cmp k v t2)`,
891  rw [submap_def,fdom_insert]
892  >> res_tac
893  >> `good_cmp cmp` by metis_tac [eq_cmp_def]
894  >> rw [lookup_insert_thm]
895  >> `k=x` by metis_tac [eq_cmp_def]
896  >> rw []
897  >> metis_tac[]);
898
899val submap_insert = Q.prove
900(`!bmap t cmp x v.
901    eq_cmp cmp /\ invariant cmp bmap /\
902    x NOTIN fdom cmp bmap /\
903    submap cmp (insert cmp x v bmap) t
904    ==> submap cmp bmap t`,
905 rw_tac set_ss [submap_def]
906  >> `invariant cmp (insert cmp x v bmap)` by metis_tac [insert_thm,eq_cmp_def]
907  >- (qpat_x_assum `$! M` (mp_tac o Q.ID_SPEC) >> rw_tac set_ss [fdom_insert])
908  >- (`~(x' = x)` by (fs [fdom_def] >> metis_tac[])
909       >> `fdom cmp (insert cmp x v bmap) x'` by rw [fdom_insert,IN_DEF]
910       >> `good_cmp cmp` by metis_tac [eq_cmp_def]
911       >> `lookup cmp x' (insert cmp x v bmap) = lookup cmp x' t` by metis_tac[]
912       >> pop_assum (SUBST_ALL_TAC o SYM)
913       >> pat_elim `$!M`
914       >> rw_tac set_ss [lookup_insert_thm]
915       >> metis_tac [eq_cmp_def]));
916
917val set_lem = Q.prove
918(`!x s t. x IN s ==> (s UNION t = s UNION (x INSERT t))`,
919 rw_tac set_ss [SET_EQ_THM] >> metis_tac []);
920
921val fapply_def =
922 Define
923  `fapply cmp bmap x = THE (lookup cmp x bmap)`;
924
925val extend_states_thm = Q.prove
926(`!next_state state_map table states next_state' state_map' table'.
927  (extend_states next_state state_map table states = (next_state',state_map',table'))
928  /\ invariant regexp_compare state_map
929   ���
930  (fdom regexp_compare state_map' = (fdom regexp_compare state_map UNION set (MAP SND states))) ���
931  submap regexp_compare state_map state_map' ���
932  next_state ��� next_state' ���
933  (table' = REVERSE (MAP (\(c,r). (c, fapply regexp_compare state_map' r)) states) ++ table) /\
934  invariant regexp_compare state_map'`
935 ,
936 `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare]
937  >> ho_match_mp_tac extend_states_ind
938  >> rw [extend_states_def]
939  >> BasicProvers.EVERY_CASE_TAC
940  >> `invariant regexp_compare (insert regexp_compare r' next_state state_map)`
941       by metis_tac [insert_thm,eq_cmp_def]
942  >> fs [submap_id]
943  >> res_tac
944  >- (rw_tac set_ss [fdom_insert] >> metis_tac[])
945  >- (`r' IN fdom regexp_compare state_map` by rw [fdom_def]
946       >> rw_tac set_ss [set_lem])
947  >- (match_mp_tac submap_insert
948      >> rw [fdom_def]
949      >> metis_tac [NOT_SOME_NONE])
950  >- decide_tac
951  >- (pat_elim `$! M` >> rw[]
952       >> qpat_x_assum `submap _ __ ___` mp_tac
953      >> rw_tac set_ss [submap_def,fapply_def]
954      >> pop_assum (mp_tac o Q.SPEC `r'`)
955      >> rw [fdom_insert]
956      >> pop_assum mp_tac
957      >> `good_cmp regexp_compare` by metis_tac [regexp_compare_good]
958      >> rw [lookup_insert_thm,regexp_compare_eq]
959      >> metis_tac [THE_DEF])
960  >- (pat_elim `$! M` >> rw[]
961       >> qpat_x_assum `submap _ __ ___` mp_tac
962      >> rw_tac set_ss [submap_def,fapply_def]
963      >> pop_assum (mp_tac o Q.SPEC `r'`)
964      >> rw [fdom_def]
965      >> metis_tac [THE_DEF])
966);
967
968val good_table_def =
969 Define
970  `good_table state_map table =
971    (ALL_DISTINCT (MAP FST table) ���
972    (!n table2.
973       (ALOOKUP table n = SOME table2)
974       ���
975       (set (MAP FST table2) = set ALPHABET)) ���
976    (!n c n' table2.
977       (ALOOKUP table n = SOME table2) ���
978       (ALOOKUP table2 c = SOME n')
979       ���
980       MEM c ALPHABET ���
981       ?r r'.
982         (lookup regexp_compare r state_map = SOME n) ���
983         (lookup regexp_compare r' state_map = SOME n') ���
984         (r' = smart_deriv c r)))`;
985
986val invar_def = Define `invar = invariant regexp_compare`;
987val dom_def   = Define `dom = fdom regexp_compare`;
988val range_def = Define `range = frange regexp_compare`;
989val union_def = Define `union = ounion regexp_compare`;
990val set_def   = Define `set = oset regexp_compare`;
991val image_def = Define `image = oimage regexp_compare`;
992val apply_def = Define `apply = fapply regexp_compare`;
993
994val Brz_invariant_def =
995 Define
996  `Brz_invariant seen todo (next_state,state_map,table) ���
997    invar state_map /\ invar seen /\
998    EVERY is_normalized todo ���
999    (!r. mem_regexp r seen ��� is_normalized r) ���
1000    (!r. r ��� dom state_map ��� is_normalized r) ���
1001    (dom (union seen (set todo)) = dom state_map) ���
1002    (range state_map = count next_state) ���
1003    (set (MAP FST table) = fdom num_cmp (oimage num_cmp (apply state_map) seen)) ���
1004    fmap_inj regexp_compare state_map ���
1005    good_table state_map table`;
1006
1007val lem = Q.prove (
1008`!b c d e f. Brz_invariant b c (d,e,f) ��� EVERY is_normalized c`,
1009 rw [Brz_invariant_def]);
1010
1011val fdom_ounion = Q.prove
1012(`!a b. good_cmp cmp /\ invariant cmp a /\ invariant cmp b
1013     ==> (fdom cmp (ounion cmp a b) = (fdom cmp a) UNION (fdom cmp b))`,
1014 rw[fdom_def,ounion_def,SET_EQ_THM]
1015  >> metis_tac [regexp_compare_good,good_oset_def,
1016       SIMP_RULE std_ss [oin_def,ounion_def,
1017                         member_iff_lookup,oneTheory.one] oin_ounion]);
1018
1019val dom_union = Q.prove
1020(`!a b. invariant regexp_compare a /\ invariant regexp_compare b
1021        ==> (dom (union a b) = (dom a) UNION (dom b))`,
1022 metis_tac [regexp_compare_good,fdom_ounion,dom_def,union_def])
1023
1024val invariant_oset = Q.store_thm
1025("invariant_oset",
1026 `!l. good_cmp cmp ==> invariant cmp (oset cmp l)`,
1027 simp_tac std_ss [oset_def]
1028   >> Induct
1029   >> fs [oempty_def,empty_def,oinsert_def]
1030   >- metis_tac [invariant_def]
1031   >- metis_tac [insert_thm]);
1032
1033val in_dom_oset = Q.store_thm
1034("in_dom_oset",
1035 `!l x. eq_cmp cmp ==> (x IN fdom cmp (oset cmp l) <=> MEM x l)`,
1036 Induct >> rw []
1037  >- rw [oempty_def,empty_def,fdom_def,lookup_def]
1038  >- (`good_cmp cmp` by metis_tac [eq_cmp_def]
1039       >> imp_res_tac invariant_oset >> pop_assum (assume_tac o Q.SPEC `l`)
1040       >> rw [oset_def]
1041       >> rw [GSYM oset_def]
1042       >> rw [oinsert_def]
1043       >> rw_tac set_ss [fdom_insert]));
1044
1045val dom_set_append = Q.store_thm
1046("dom_set_append",
1047 `!l1 l2. dom (set (l1++l2)) = dom (set l1) UNION dom (set l2)`,
1048 rw[dom_def,set_def,EXTENSION] >>
1049 `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare] >>
1050 rw [in_dom_oset]);
1051
1052val member_insert = Q.store_thm
1053("member_insert",
1054 `!bmap x y v.
1055    eq_cmp cmp /\ invariant cmp bmap ==>
1056    (member cmp x (insert cmp y v bmap) <=> (x=y) \/ member cmp x bmap)`,
1057 rw [member_iff_lookup,GSYM (SIMP_RULE set_ss [SET_EQ_THM] fdom_def)] >>
1058 rw [fdom_insert] >> metis_tac [IN_DEF]);
1059
1060val triv_lem = Q.prove(`!s1 s2. (s1 = s2) ==> ((s1 UNION s) = (s2 UNION s))`,rw[]);
1061
1062val dom_oset_lem = Q.prove
1063(`!l. eq_cmp cmp ==> (fdom cmp (oset cmp l) = LIST_TO_SET l)`,
1064 rw [EXTENSION,in_dom_oset,eq_cmp_regexp_compare]);
1065
1066val mem_foldrWithKey = Q.prove
1067(`!(bset:'a oset) acc a. eq_cmp cmp /\ invariant cmp bset ==>
1068     (MEM (a,()) (foldrWithKey (\k x xs. (k,())::xs) acc bset)
1069            <=>
1070      (a IN fdom cmp bset) \/ MEM (a,()) acc)`,
1071simp_tac set_ss [fdom_def]
1072 >> Induct >> rw [foldrWithKey_def,invariant_def] >> fs []
1073 >- rw [lookup_def]
1074 >- (`good_cmp cmp` by metis_tac [eq_cmp_def]
1075      >> rw [lookup_bin,EQ_IMP_THM]
1076      >> metis_tac [key_greater_lookup,key_less_lookup,good_cmp_thm,eq_cmp_def]));
1077
1078val mem_foldrWithKey_lem =
1079    mem_foldrWithKey
1080      |> Q.SPEC `bset`
1081      |> Q.SPEC `[]`
1082      |> SIMP_RULE list_ss []
1083
1084val left_to_right = Q.prove
1085(`eq_cmp cmp ==>
1086   !list bmap k v. invariant cmp bmap /\
1087        (lookup cmp k (FOLDR (��(k,v) t. insert cmp k v t) bmap list) = SOME v)
1088            ==>
1089           (MEM (k,v) list \/ (lookup cmp k bmap = SOME v))`,
1090 strip_tac
1091  >> `good_cmp cmp` by metis_tac [eq_cmp_def]
1092  >> Induct
1093     >- rw []
1094     >- (Cases_on `h` >> rw []
1095          >> pop_assum mp_tac
1096          >> `invariant cmp (FOLDR (��(k,v) t. insert cmp k v t) bmap list)`
1097               by (Q.SPEC_TAC (`list`,`L`)
1098                     >> Induct >> rw [invariant_def]
1099                     >> Cases_on `h` >> rw [] >> metis_tac [insert_thm])
1100          >> rw [lookup_insert_thm]
1101          >> metis_tac [eq_cmp_def]));
1102
1103val invariant_ffoldr = Q.prove
1104(`!list aset f.
1105   good_cmp cmp /\ invariant cmp aset
1106   ==>
1107   invariant cmp (FOLDR (��(k,v) t. insert cmp (f k) v t) aset list)`,
1108  Induct >> rw [invariant_def]
1109         >> Cases_on `h` >> rw [] >> metis_tac [insert_thm]);
1110
1111val invariant_ffoldr_inst =
1112    invariant_ffoldr
1113      |> INST_TYPE [beta |-> ``:unit``, gamma |-> beta]
1114
1115val left_to_right_alt = Q.prove
1116(`eq_cmp (cmp:'b->'b->ordering)
1117   ==>
1118   !(list :('a # unit) list) (bset :'b oset) x f.
1119      invariant cmp bset /\
1120      (lookup cmp x (FOLDR (��(k,v:unit) t. insert cmp (f k) () t) bset list) = SOME ())
1121            ==>
1122           (?a. MEM (a,()) list /\ (x = f a))
1123           \/
1124           ((!a. MEM (a,()) list ==> (x <> f a)) /\ (lookup cmp x bset = SOME ()))`,
1125 strip_tac
1126  >> `good_cmp cmp` by metis_tac [eq_cmp_def]
1127  >> Induct
1128     >- rw []
1129     >- (Cases_on `h` >> rw [] >> fs []
1130          >> pop_assum mp_tac
1131          >> `invariant cmp (FOLDR (��(k,v:unit) t. insert cmp (f k) () t) bset list)`
1132               by (Q.SPEC_TAC (`list`,`L`)
1133                     >> Induct >> rw [invariant_def]
1134                     >> Cases_on `h` >> rw [] >> metis_tac [insert_thm])
1135          >> rw [lookup_insert_thm]
1136          >> metis_tac [eq_cmp_def]));
1137
1138val left_to_right_lem =
1139    left_to_right_alt
1140      |> Q.GEN `cmp`
1141      |> Q.SPEC `cmp2`
1142      |> UNDISCH
1143      |> Q.SPEC `foldrWithKey (��(k:'a) (x:unit) xs. (k,())::xs) [] aset`
1144      |> Q.SPEC `Tip`
1145      |> Q.SPEC `x`
1146      |> Q.SPEC `f`
1147      |> DISCH_ALL;
1148
1149val oin_fdom = Q.prove
1150(`!aset x. oin cmp x aset <=> x IN fdom cmp aset`,
1151 rw [fdom_def, oin_def, member_iff_lookup]);
1152
1153val mem_oin = Q.prove
1154(`!list x aset.
1155     eq_cmp cmp /\ invariant cmp aset /\
1156     MEM (x,()) list
1157      ==>
1158     oin cmp (f x) (FOLDR (\(k,u) s. insert cmp (f k) () s) aset list)`,
1159 Induct >> rw [oin_fdom]
1160  >> `good_cmp cmp` by metis_tac [eq_cmp_def]
1161  >> mp_tac (SPEC_ALL invariant_ffoldr_inst)
1162  >| [all_tac, Cases_on `h`]
1163  >> rw[fdom_insert]
1164  >> metis_tac [oin_fdom,IN_DEF]);
1165
1166val mem_oin_inst =
1167   mem_oin
1168   |> SPEC_ALL
1169   |> Q.INST [`cmp` |-> `cmp2`, `aset` |-> `Tip`, `x` |-> `x'`]
1170   |> Q.GEN `list`
1171
1172val fdom_oimage = Q.store_thm
1173("fdom_image",
1174 `!aset:'a oset.
1175     eq_cmp (cmp1:'a->'a->ordering) /\
1176     eq_cmp (cmp2:'b->'b->ordering) /\ invariant cmp1 aset
1177    ==> (fdom cmp2 (oimage cmp2 f aset) = {f x | oin cmp1 x aset})`,
1178simp_tac set_ss
1179   [oimage_def,map_keys_def,balanced_mapTheory.fromList_def,fdom_def,
1180    toAscList_def,empty_def,
1181    rich_listTheory.FOLDR_MAP,LAMBDA_PROD,oneTheory.one]
1182 >> rw [EQ_IMP_THM]
1183    >- (mp_tac left_to_right_lem >> rw[invariant_def]
1184        >- (qexists_tac `a` >> rw[]
1185             >> pat_elim `lookup _ __ ___ = SOME ()`
1186             >> pat_elim `eq_cmp cmp2`
1187             >> `a IN fdom cmp1 aset` by metis_tac [mem_foldrWithKey_lem]
1188             >> pop_assum mp_tac
1189             >> rw[oin_def,member_iff_lookup,fdom_def])
1190        >- fs [lookup_def])
1191    >- (`MEM (x',()) (foldrWithKey (��k x xs. (k,())::xs) [] aset)`
1192          by metis_tac [oin_fdom,IN_DEF, mem_foldrWithKey_lem]
1193         >> mp_tac mem_oin_inst >> rw [invariant_def]
1194         >> res_tac
1195         >> fs [oin_def,member_iff_lookup,oneTheory.one])
1196);
1197
1198val fdom_oimage_rw =
1199 fdom_oimage
1200   |> SIMP_RULE set_ss [GSPECIFICATION_applied];
1201
1202val fdom_oimage_insert = Q.prove
1203(`!bset a f cmp1 cmp2.
1204    eq_cmp (cmp1:'a->'a->ordering) /\
1205     eq_cmp (cmp2:'b->'b->ordering) /\ invariant cmp1 bset
1206         ==> (fdom cmp2 (oimage cmp2 f (oinsert cmp1 a bset))
1207               =
1208              ((f a) INSERT fdom cmp2 (oimage cmp2 f bset)))`,
1209rpt strip_tac
1210 >> `invariant cmp1 (oinsert cmp1 a bset)` by metis_tac [insert_thm, eq_cmp_def,oinsert_def]
1211 >> mp_tac fdom_oimage_rw >> asm_simp_tac bool_ss[]
1212 >> rw_tac set_ss []
1213 >> `good_cmp cmp1 /\ good_cmp cmp2` by metis_tac [eq_cmp_def]
1214 >> rw [oin_def, member_iff_lookup, oinsert_def]
1215 >> fs [eq_cmp_def,lookup_insert_thm]
1216 >> metis_tac[]);
1217
1218
1219val fdom_oimage_inst =
1220   fdom_oimage
1221   |> INST_TYPE [alpha |-> ``:regexp``, beta |-> ``:num``]
1222   |> Q.INST [`cmp1` |-> `regexp_compare`, `cmp2` |-> `num_cmp`]
1223   |> SIMP_RULE std_ss [eq_cmp_regexp_compare,num_cmp_good,num_cmp_antisym,eq_cmp_def]
1224
1225val Brz_inv_pres = Q.prove (
1226`!todo1 todos seen next_state state_map table.
1227  Brz_invariant seen (todo1::todos) (next_state,state_map,table) ���
1228  ~mem_regexp todo1 seen
1229 ���
1230  Brz_invariant (insert_regexp todo1 seen)
1231                (MAP SND (transitions todo1) ++ todos)
1232                (build_table (transitions todo1)
1233                             todo1 (next_state,state_map,table))`,
1234 rw_tac list_ss [good_table_def,insert_regexp_def, invar_def,
1235                 Brz_invariant_def, build_table_def, transitions_def,set_def] >>
1236 imp_res_tac extend_states_thm >>
1237 `todo1 IN dom state_map`
1238   by (qpat_x_assum `dom (union a b) = dom state_map` (mp_tac o SYM) >> rw [] >>
1239       `invariant regexp_compare
1240         (oset regexp_compare (todo1::todos))` by metis_tac [invariant_oset,regexp_compare_good]
1241       >> rw [dom_union]
1242       >> disj2_tac
1243       >> `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare]
1244       >> rw [dom_def,in_dom_oset])
1245   >>
1246 `todo1 IN dom state_map'` by metis_tac [submap_def,dom_def]
1247 >>
1248 pop_assum (strip_assume_tac o SIMP_RULE set_ss [dom_def, fdom_def]) >> rw [] >>
1249  rw [Brz_invariant_def, good_table_def,invar_def]
1250  >- metis_tac [insert_thm,regexp_compare_good]
1251  >- (rw [EVERY_MAP] >> Q.SPEC_TAC (`ALPHABET`,`alphabet`)
1252       >> Induct >> rw [smart_deriv_normalized])
1253  >- metis_tac [member_insert,mem_regexp_def,eq_cmp_regexp_compare,insert_thm,eq_cmp_def]
1254  >- (`r IN fdom regexp_compare state_map \/
1255       r IN set (MAP SND (MAP (��c. (c,smart_deriv c todo1)) ALPHABET))`
1256       by metis_tac [EXTENSION,dom_def,IN_UNION]
1257       >- metis_tac [dom_def]
1258       >- (fs [MAP_MAP_o,MEM_MAP] >> metis_tac [smart_deriv_normalized, dom_def]))
1259  >- (fs [GSYM dom_def] >> qpat_x_assum `dom _ = dom __` (assume_tac o SYM)
1260       >> rw [GSYM set_def]
1261       >> `invariant regexp_compare (insert regexp_compare todo1 () seen)`
1262            by metis_tac [insert_thm,regexp_compare_good]
1263       >> `invariant regexp_compare
1264             (set (MAP SND (MAP (��c. (c,smart_deriv c todo1)) ALPHABET) ++ todos))`
1265           by metis_tac [invariant_oset, set_def,regexp_compare_good]
1266       >> `invariant regexp_compare (set (todo1::todos))`
1267          by metis_tac [invariant_oset, set_def, regexp_compare_good]
1268       >> rw [dom_union]
1269       >> rw_tac bool_ss [Once CONS_APPEND,dom_set_append]
1270       >> `dom (insert regexp_compare todo1 () seen) =
1271           dom (set [todo1]) UNION dom seen`
1272            by (rw [dom_def,fdom_insert,eq_cmp_regexp_compare,Once INSERT_SING_UNION]
1273                >> match_mp_tac triv_lem
1274                >> rw_tac bool_ss [EXTENSION,set_def]
1275               >> rw [in_dom_oset,eq_cmp_regexp_compare])
1276       >> rw []
1277       >> rw [AC UNION_COMM UNION_ASSOC]
1278       >> metis_tac [dom_oset_lem,eq_cmp_regexp_compare,dom_def,set_def])
1279  >- (`extend_states_inv next_state state_map ([]:(num,num) alist)`
1280               by (rw [extend_states_inv_def] >> metis_tac [range_def])
1281       >> imp_res_tac extend_states_inv
1282       >> fs [extend_states_inv_def, count_def, EXTENSION, range_def])
1283  >- (`todo1 IN dom state_map'` by (fs[submap_def] >> metis_tac[dom_def])
1284      >> `eq_cmp num_cmp /\ eq_cmp regexp_compare`
1285            by metis_tac[eq_cmp_regexp_compare,num_cmp_good,num_cmp_antisym,eq_cmp_def]
1286      >> rw [GSYM oinsert_def,fdom_oimage_insert]
1287      >> `apply state_map' todo1 = x` by rw [apply_def,fapply_def]
1288      >> pop_assum SUBST_ALL_TAC
1289      >> AP_TERM_TAC
1290      >> `dom seen SUBSET dom state_map` by
1291           (qpat_x_assum `dom _ = dom state_map` (SUBST_ALL_TAC o SYM)
1292             >> metis_tac [invariant_oset, eq_cmp_def,dom_union,SUBSET_UNION])
1293      >> rw[fdom_oimage_inst,SET_EQ_THM,EQ_IMP_THM,oin_fdom]
1294      >> Q.EXISTS_TAC `x''` >> rw[]
1295      >> `x'' IN dom state_map` by metis_tac [SUBSET_DEF,dom_def]
1296      >> `(x'' IN dom state_map') /\
1297          (lookup regexp_compare x'' state_map = lookup regexp_compare x'' state_map')`
1298           by metis_tac [submap_def,dom_def]
1299      >> rw [apply_def,fapply_def])
1300  >- (`extend_states_inv next_state state_map ([]:(num,num) alist)`
1301               by (rw [extend_states_inv_def] >> metis_tac [range_def])
1302       >> imp_res_tac extend_states_inv
1303       >> pop_assum mp_tac
1304       >> rw [extend_states_inv_def])
1305  >- (rw_tac set_ss [fdom_oimage_inst,GSYM IMP_DISJ_THM,oneTheory.one,
1306                     oin_fdom,fdom_def,apply_def,fapply_def]
1307       >> strip_tac
1308       >> `fmap_inj regexp_compare state_map'`
1309           by (`extend_states_inv next_state state_map ([]:(num,num) alist)`
1310                 by (rw [extend_states_inv_def] >> metis_tac [range_def])
1311               >> imp_res_tac extend_states_inv
1312               >> pop_assum mp_tac
1313               >> rw [extend_states_inv_def])
1314       >> `dom seen SUBSET dom state_map` by
1315           (`eq_cmp num_cmp /\ eq_cmp regexp_compare`
1316                by metis_tac[eq_cmp_regexp_compare,
1317                             num_cmp_good,num_cmp_antisym,eq_cmp_def]
1318             >> qpat_x_assum `dom _ = dom state_map` (SUBST_ALL_TAC o SYM)
1319             >> metis_tac [invariant_oset, eq_cmp_def,dom_union,SUBSET_UNION])
1320       >> pop_assum mp_tac
1321       >> rw_tac set_ss [dom_def, fdom_def, member_iff_lookup,SUBSET_DEF,oneTheory.one]
1322       >> Q.EXISTS_TAC `x'` >> rw[]
1323       >> strip_tac
1324       >> `x' IN fdom regexp_compare state_map` by fs [SUBSET_DEF,dom_def, fdom_def]
1325       >> `(x' IN fdom regexp_compare state_map') /\
1326           (lookup regexp_compare x' state_map = lookup regexp_compare x' state_map')`
1327             by (fs [submap_def,fdom_def, member_iff_lookup] >> metis_tac [])
1328       >> fs []
1329       >> `x' <> todo1` by (fs[mem_regexp_def,member_iff_lookup] >> metis_tac[])
1330       >> metis_tac [fmap_inj_def,eq_cmp_regexp_compare, eq_cmp_def])
1331  >- (pop_assum mp_tac
1332        >> rw[MAP_MAP_o,combinTheory.o_DEF]
1333        >> fs[MAP_REVERSE,MAP_MAP_o,combinTheory.o_DEF]
1334        >> metis_tac[])
1335  >- (qpat_x_assum `(if _ then __ else ___) = ____` mp_tac
1336        >> rw[MAP_MAP_o,combinTheory.o_DEF]
1337             >- (imp_res_tac alistTheory.ALOOKUP_MEM
1338                  >> fs [MEM_REVERSE,MEM_MAP])
1339             >- metis_tac[])
1340  >- (qpat_x_assum `(if _ then __ else ___) = ____` mp_tac
1341        >> rw[MAP_MAP_o,combinTheory.o_DEF]
1342           >- (qexists_tac `todo1` >> rw[]
1343                  >> imp_res_tac alistTheory.ALOOKUP_MEM
1344                  >> pop_assum mp_tac
1345                  >> rw_tac set_ss [MEM_REVERSE,MEM_MAP,fapply_def]
1346                  >> `smart_deriv c todo1 IN fdom regexp_compare state_map'`
1347                      by (rw [MAP_MAP_o,combinTheory.o_DEF,MEM_MAP]
1348                           >> disj2_tac
1349                           >> qexists_tac `c` >> rw[]
1350                           >> metis_tac [IN_DEF])
1351                  >> fs [fdom_def, member_iff_lookup])
1352           >- (fs[submap_def, fdom_def, member_iff_lookup] >> metis_tac[]))
1353);
1354
1355
1356val Brz_invariant_alt = Q.prove
1357(`!seen worklist worklist' acc.
1358   (LIST_TO_SET worklist = LIST_TO_SET worklist')
1359   ==>
1360   (Brz_invariant seen worklist acc = Brz_invariant seen worklist' acc)`,
1361rpt strip_tac >> PairCases_on `acc`
1362 >> rw[Brz_invariant_def,EQ_IMP_THM]
1363 >> ((qpat_x_assum `EVERY _ __` mp_tac >>
1364        qpat_x_assum `LIST_TO_SET _ = LIST_TO_SET __` mp_tac
1365         >> rw [EVERY_MEM]
1366         >> NO_TAC)
1367    ORELSE
1368     (qpat_x_assum `dom _ = dom __` (SUBST_ALL_TAC o SYM)
1369         >> fs [invar_def]
1370         >> `invariant regexp_compare (set worklist') /\
1371             invariant regexp_compare (set worklist)`
1372              by metis_tac [invariant_oset,set_def, regexp_compare_good]
1373          >> rw [dom_union,EXTENSION]
1374          >> `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare]
1375          >> rw [in_dom_oset,dom_def,set_def])));
1376
1377val Brz_inv_thm = Q.prove
1378(`!seen worklist acc.
1379   dom_Brz seen worklist acc
1380   ==>
1381    !seen' acc'.
1382      Brz_invariant seen worklist acc ���
1383      (Brzozowski seen worklist acc = (seen',acc'))
1384      ==>
1385       Brz_invariant seen' [] acc'`,
1386 ho_match_mp_tac Brzozowski_ind
1387   >> rw []
1388   >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac
1389   >> rw [Brzozowski_eqns]
1390   >> pop_assum mp_tac
1391   >> CASE_TAC
1392   >> rw[]
1393      >- metis_tac[]
1394      >- (rfs []
1395          >> first_assum match_mp_tac
1396          >> qpat_x_assum `Brz_invariant seen (h::t) args` mp_tac
1397          >> PairCases_on `acc`
1398          >> rw [Brz_invariant_def]
1399          >> qpat_x_assum `dom _ = dom __` (SUBST_ALL_TAC o SYM)
1400          >> fs [invar_def]
1401          >> `invariant regexp_compare (set t) /\
1402              invariant regexp_compare (set (h::t))`
1403              by metis_tac [invariant_oset,set_def, regexp_compare_good]
1404          >> rw [dom_union,EXTENSION]
1405          >> `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare]
1406          >> rw [in_dom_oset,dom_def,set_def]
1407          >> fs [mem_regexp_def, member_iff_lookup]
1408          >> rw [fdom_def]
1409          >> metis_tac[])
1410      >- (rfs [LET_THM]
1411          >> rw[]
1412          >> pat_elim `_ = __`
1413          >> PairCases_on `acc`
1414          >> imp_res_tac Brz_inv_pres
1415          >> `LIST_TO_SET (remove_dups (MAP SND (transitions h)) ++ t) =
1416              LIST_TO_SET (MAP SND (transitions h) ++ t)`
1417                    by rw [EXTENSION,remove_dups_mem]
1418          >> metis_tac [Brz_invariant_alt]))
1419
1420val Brz_mono = Q.prove
1421(`!seen worklist acc.
1422  dom_Brz seen worklist acc
1423 ==>
1424 !seen' acc'.
1425    Brz_invariant seen worklist acc /\
1426    (Brzozowski seen worklist acc = (seen',acc'))
1427  ==>
1428    submap regexp_compare (FST (SND acc)) (FST(SND acc'))`,
1429 ho_match_mp_tac Brzozowski_ind
1430 >> Cases_on `worklist`
1431    >- (rw []
1432         >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac
1433         >> rw [Brzozowski_eqns,submap_id])
1434    >- (rw []
1435         >> qpat_x_assum `Brzozowski _ __ ___ = _4` mp_tac
1436         >> rw [Brzozowski_eqns]
1437         >> fs[LET_THM]
1438            >- (first_assum match_mp_tac
1439                >> qpat_x_assum `Brz_invariant seen (h::t) acc` mp_tac
1440                >> PairCases_on `acc`
1441                >> rw [Brz_invariant_def]
1442                >> qpat_x_assum `dom _ = dom __` (SUBST_ALL_TAC o SYM)
1443                >> fs [invar_def]
1444                >> `invariant regexp_compare (set t) /\
1445                    invariant regexp_compare (set (h::t))`
1446                      by metis_tac [invariant_oset,set_def, regexp_compare_good]
1447                >> rw [dom_union,EXTENSION]
1448                >> `eq_cmp regexp_compare` by metis_tac [eq_cmp_regexp_compare]
1449                >> rw [in_dom_oset,dom_def,set_def]
1450                >> fs [mem_regexp_def, member_iff_lookup]
1451                >> rw [fdom_def]
1452                >> metis_tac[])
1453            >- (rw[]
1454                >> pat_elim `_ = __`
1455                >> PairCases_on `acc`
1456                >> imp_res_tac Brz_inv_pres
1457                >> `LIST_TO_SET (remove_dups (MAP SND (transitions h)) ++ t) =
1458                    LIST_TO_SET (MAP SND (transitions h) ++ t)`
1459                        by rw [EXTENSION,remove_dups_mem]
1460                >> imp_res_tac Brz_invariant_alt
1461                >> fs[]
1462                >> NTAC 3 (pop_assum (K ALL_TAC))
1463                >> match_mp_tac submap_trans
1464                >> qexists_tac `FST (SND (build_table (transitions h) h (acc0,acc1,acc2)))`
1465                >> rw[build_table_def,LET_THM]
1466                >> `?x state_map'' z. extend_states acc0 acc1 [] (transitions h) = (x,state_map'',z)`
1467                       by metis_tac [pair_CASES]
1468                >> fs[]
1469                >> `invariant regexp_compare acc1`
1470                     by metis_tac [Brz_invariant_def, invar_def]
1471                >> imp_res_tac extend_states_thm
1472                >> rw []
1473                >> CASE_TAC
1474                >> fs []
1475                >> match_mp_tac submap_mono
1476                >> rw[eq_cmp_regexp_compare]
1477                >> qpat_x_assum `submap _ __ ___` mp_tac
1478                >> rw [submap_def]
1479                >> strip_tac
1480                >> res_tac
1481                >> fs [fdom_def]
1482                >> metis_tac [NOT_SOME_NONE])));
1483
1484(*---------------------------------------------------------------------------*)
1485(* table_lang                                                                *)
1486(*---------------------------------------------------------------------------*)
1487
1488val table_lang_def =
1489 Define
1490  `(table_lang final_states table n [] = MEM n final_states) /\
1491   (table_lang final_states table n (c::t) =
1492     case ALOOKUP table n of
1493       | NONE => F
1494       | SOME table2 =>
1495           case ALOOKUP table2 c of
1496             | NONE => F
1497             | SOME n' => table_lang final_states table n' t)`;
1498
1499val table_lang_correct = Q.prove
1500(`!finals table state_map.
1501  fmap_inj regexp_compare state_map ���
1502  good_table state_map table ���
1503  (set (MAP FST table) = frange regexp_compare state_map) ���
1504  (!n r. (lookup regexp_compare r state_map = SOME n) ��� (MEM n finals ��� nullable r))
1505  ���
1506  !n r s.
1507    (!c. MEM c s ��� MEM c ALPHABET) ���
1508    (lookup regexp_compare r state_map = SOME n)
1509    ���
1510    (table_lang finals table n s ��� smart_deriv_matches r (MAP CHR s))`,
1511 rpt gen_tac >>
1512 strip_tac >>
1513 Induct_on `s` >>
1514 rw [table_lang_def, smart_deriv_matches_def] >>
1515 fs [good_table_def] >>
1516 BasicProvers.EVERY_CASE_TAC >>
1517 rw []
1518 >- (imp_res_tac alistTheory.ALOOKUP_NONE
1519     >> fs [EXTENSION]
1520     >> `n NOTIN frange regexp_compare state_map` by metis_tac[]
1521     >> fs [frange_def])
1522 >- metis_tac [alistTheory.ALOOKUP_NONE]
1523 >- (`���r'.(lookup regexp_compare r' state_map = SOME n) /\
1524          (lookup regexp_compare (smart_deriv h r') state_map =
1525           SOME x')` by metis_tac []
1526     >> `table_lang finals table x' s <=>
1527         smart_deriv_matches (smart_deriv h r') (MAP CHR s)`
1528         by metis_tac[]
1529     >> pop_assum SUBST_ALL_TAC
1530     >> `r' = r`
1531         by (qpat_x_assum `fmap_inj _ __` mp_tac
1532              >> rw_tac set_ss [fmap_inj_def,fdom_def,regexp_compare_eq])
1533     >> rw []
1534     >> metis_tac [ORD_CHR_lem,mem_alphabet]))
1535
1536val vec_lang_lem1 = Q.prove
1537(`���(n:num). MEM n finals_list ���
1538           (ALOOKUP (MAP (��x. (x,T)) finals_list) n = SOME T)`,
1539 rw [EQ_IMP_THM]
1540  >- rw [alistTheory.ALOOKUP_TABULATE]
1541  >- (`MEM (n,T) (MAP (��x. (x,T)) finals_list)` by METIS_TAC [alistTheory.ALOOKUP_MEM]
1542      >> fs[MEM_MAP]));
1543
1544val vec_lang_lem2 =
1545  alistTheory.alookup_stable_sorted
1546     |> Q.INST_TYPE [`:'a` |-> `:num`, `:'b` |-> `:(num, 'a) alist`]
1547     |> Q.SPECL [`$<=`, `mergesort`, `n`, `table`]
1548     |> SIMP_RULE (srw_ss()++ARITH_ss) [transitive_def, total_def, mergesort_STABLE_SORT];
1549
1550(* Following are a bit suss because of use of ORD *)
1551val vec_lang_lem3 =
1552  alistTheory.alookup_stable_sorted
1553     |> Q.INST_TYPE [`:'a` |-> `:num`, `:'b` |-> `:'a option`]
1554     |> Q.SPECL [`$<=`, `mergesort`, `n`, `MAP (��(c,n). (c,SOME n)) table2`]
1555     |> SIMP_RULE (srw_ss()++ARITH_ss)
1556           [transitive_def, total_def, mergesort_STABLE_SORT, stringTheory.char_le_def];
1557
1558val vec_lang_lem4 = Q.prove (
1559`!l x. ALOOKUP (MAP (��(c,n). (c,SOME n)) l) x =
1560       OPTION_MAP SOME (ALOOKUP l x)`,
1561 Induct_on `l` >>
1562 rw [] >>
1563 PairCases_on `h` >>
1564 rw [] >>
1565 fs [stringTheory.ORD_11]);
1566
1567val leq_thm =
1568 let val leq = numSyntax.leq_tm
1569 in Q.prove (`transitive ^leq ��� total ^leq ��� antisymmetric ^leq`,
1570    srw_tac [ARITH_ss] [transitive_def, total_def, antisymmetric_def])
1571 end;
1572
1573val length_mergesort = Q.prove
1574(`!l R. LENGTH (mergesort R l) = LENGTH l`,
1575 metis_tac[mergesort_perm,PERM_LENGTH]);
1576
1577val table_to_vec_thm = Q.prove (
1578`!table final_states max_char max_state table' final_states'.
1579  (max_char = alphabet_size) /\
1580  SORTED $<= final_states /\
1581  (!x. MEM x final_states ��� x < max_state) ���
1582  (!n l c. (ALOOKUP table n = SOME l) ��� (?x. ALOOKUP l c = SOME x) ��� c < max_char) ���
1583  PERM (MAP FST table) (COUNT_LIST (LENGTH table)) ���
1584  (table_to_vectors table = table') /\
1585  (accepts_to_vector final_states max_state = final_states')
1586  ���
1587  (LENGTH table' = LENGTH table) ���
1588  (LENGTH final_states' = max_state) ���
1589  (!n. MEM n final_states ��� n < LENGTH final_states' ��� EL n final_states') ���
1590  (!n. (?l. ALOOKUP table n = SOME l) ��� n < LENGTH table') ���
1591  (!n l. n < LENGTH table' ��� (LENGTH (EL n table') = max_char)) ���
1592  (!n c x.
1593    (?l. (ALOOKUP table n = SOME l) ��� (ALOOKUP l c = SOME x)) ���
1594    n < LENGTH table' ��� c < LENGTH (EL n table') ��� (EL c (EL n table') = SOME x))`
1595 ,
1596 simp [good_table_def, table_to_vectors_def,accepts_to_vector_def] >>
1597 rpt gen_tac >>
1598 strip_tac >>
1599(* `FINITE final_states` by metis_tac [finite_bounded] >> *)
1600 qabbrev_tac `sorted_table = mergesort (inv_image $<= FST) table` >>
1601 `LENGTH sorted_table = LENGTH table` by metis_tac [mergesort_perm, PERM_LENGTH] >>
1602 `SORTS mergesort (inv_image $<= (FST : num # (num,'a) alist -> num))`
1603          by metis_tac [mergesort_STABLE_SORT, STABLE_DEF,
1604                        leq_thm, total_inv_image, transitive_inv_image] >>
1605 `MAP FST sorted_table = COUNT_LIST (LENGTH table)`
1606            by (match_mp_tac sorted_perm_count_list >>
1607                fs [SORTS_DEF] >>
1608                rw [] >>
1609                metis_tac [PERM_TRANS, PERM_SYM, PERM_MAP]) >>
1610 simp [] >>
1611 conj_tac
1612 >- metis_tac[mergesort_perm,PERM_LENGTH] >>
1613 conj_tac
1614 >- (`SORTED $<= (MAP FST (MAP (\x.(x,T)) final_states))`
1615            by rw [MAP_MAP_o, combinTheory.o_DEF]
1616      >> metis_tac [alist_to_vec_thm]) >>
1617 conj_tac
1618 >- (`SORTED $<= (MAP FST (MAP (\x.(x,T)) final_states))`
1619            by rw [MAP_MAP_o, combinTheory.o_DEF]
1620     >> `LENGTH (alist_to_vec (MAP (��x. (x,T)) final_states)
1621                              F max_state max_state) = max_state`
1622           by METIS_TAC [alist_to_vec_thm]
1623    >> POP_ASSUM SUBST_ALL_TAC
1624    >> RW_TAC std_ss [EQ_IMP_THM]
1625       >- (`n < max_state` by METIS_TAC [] >>
1626           `ALOOKUP (MAP (��x. (x,T)) final_states) n = SOME T`
1627               by METIS_TAC [vec_lang_lem1] >> METIS_TAC [alist_to_vec_thm])
1628       >- (CCONTR_TAC >> fs []
1629           >> Cases_on `ALOOKUP (MAP (��x. (x,T)) final_states) n`
1630              >- (metis_tac [alist_to_vec_thm, vec_lang_lem1])
1631              >- (NTAC 2 (POP_ASSUM MP_TAC) >> rw [vec_lang_lem1]
1632                  >> DISCH_TAC >> fs[] >> rw[] >> metis_tac [alist_to_vec_thm])))
1633 >>
1634 conj_asm1_tac
1635 >- (rw [] >> eq_tac >> rw []
1636     >- (imp_res_tac alistTheory.ALOOKUP_MEM >>
1637         imp_res_tac MEM_PERM >>
1638         fs [MEM_MAP, MEM_COUNT_LIST] >>
1639         metis_tac [FST,mergesort_perm,PERM_LENGTH])
1640     >- (`ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))`
1641                by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST,mergesort_perm,PERM_LENGTH] >>
1642         `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))`
1643                by metis_tac [vec_lang_lem2] >>
1644         rw [])) >>
1645 `SORTS mergesort (inv_image $<= (FST : num # 'a option -> num))`
1646          by metis_tac [mergesort_STABLE_SORT, STABLE_DEF, leq_thm,
1647                        total_inv_image, transitive_inv_image] >>
1648 `(��a b:num. a ��� b) = $<=` by metis_tac [] >> pop_assum SUBST_ALL_TAC >>
1649 simp [EL_MAP,length_mergesort] >>
1650 conj_tac
1651 >- (rw [] >>
1652     `?n' table2'. EL n sorted_table = (n',table2')` by metis_tac [pair_CASES] >>
1653     rw [] >>
1654     qabbrev_tac `sorted_table2 =
1655                  mergesort (inv_image $<= FST) (MAP (��(c,n). (c,SOME n)) table2')` >>
1656     qabbrev_tac `table2 = EL n (MAP SND sorted_table)` >>
1657     `SORTED $<= (MAP FST sorted_table2)`
1658             by (fs [SORTS_DEF, sorted_map, leq_thm] >>
1659                 metis_tac []) >>
1660     metis_tac [alist_to_vec_thm])
1661 >- (* last conjunct *)
1662 (fs [length_mergesort] >>
1663  rw [] >>  eq_tac >>  strip_tac >>
1664  res_tac >>
1665 simp [EL_MAP] >>
1666 `?n' table2'. EL n sorted_table = (n',table2')` by metis_tac [pair_CASES] >>
1667 rw [] >>
1668 qabbrev_tac `sorted_table2 =
1669              mergesort (inv_image $<= FST) (MAP (��(c,n). (c,SOME n)) table2')` >>
1670 qabbrev_tac `table2 = EL n (MAP SND sorted_table)` >>
1671 `SORTED $<= (MAP FST sorted_table2)`
1672         by (fs [SORTS_DEF, sorted_map, leq_thm] >>
1673             metis_tac [])
1674 >- metis_tac [alist_to_vec_thm]
1675 >- (imp_res_tac alist_to_vec_thm >>
1676     fs [AND_IMP_INTRO] >>
1677     FIRST_X_ASSUM match_mp_tac >>
1678     rw [] >>
1679     `table2' = table2`
1680            by (UNABBREV_ALL_TAC >>
1681                rw [EL_MAP]) >>
1682     rw [] >>
1683     `ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))`
1684                by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST] >>
1685     `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))`
1686                by metis_tac [vec_lang_lem2] >>
1687     rw [] >>
1688     fs [] >>
1689     `ALOOKUP (MAP (��(c,n). (c,SOME n)) table2) c = SOME (SOME x)`
1690              by metis_tac [vec_lang_lem4, OPTION_MAP_DEF] >>
1691     fs [Once (GSYM vec_lang_lem3)] >>
1692     metis_tac [NOT_SOME_NONE, alist_to_vec_thm])
1693 >- (REV_FULL_SIMP_TAC std_ss [EL_MAP] >>
1694     `table2' = table2`
1695                by (UNABBREV_ALL_TAC >>
1696                    rw [EL_MAP]) >>
1697     fs [] >>
1698     res_tac >>
1699     simp [] >>
1700     Cases_on `ALOOKUP sorted_table2 c`
1701     >- (`ALOOKUP (MAP (��(c,n). (c,SOME n)) table2) c = NONE`
1702                 by metis_tac [vec_lang_lem4, OPTION_MAP_DEF, vec_lang_lem3] >>
1703         metis_tac [NOT_SOME_NONE, alist_to_vec_thm])
1704    >- (`ALOOKUP sorted_table n = SOME (EL n (MAP SND sorted_table))`
1705           by metis_tac [dense_alist_to_vec_thm, LENGTH_COUNT_LIST] >>
1706        `ALOOKUP table n = SOME (EL n (MAP SND sorted_table))`
1707           by metis_tac [vec_lang_lem2] >>
1708        rw [] >>
1709        REV_FULL_SIMP_TAC std_ss [EL_MAP] >>
1710        `SOME x = x'` by metis_tac [SOME_11, alist_to_vec_thm] >>
1711        simp [] >>
1712        qunabbrev_tac `sorted_table2` >>
1713        fs [Once vec_lang_lem3] >>
1714        fs [vec_lang_lem4])))
1715);
1716
1717
1718val Brz_invariant_final = Q.prove (
1719`!seen next_state state_map table.
1720  Brz_invariant seen [] (next_state,state_map,table)
1721  ���
1722  (next_state = LENGTH table) ���
1723  PERM (MAP FST table) (COUNT_LIST (LENGTH table)) ���
1724  (���x. MEM x (get_accepts state_map) ==> x < LENGTH table) ���
1725  (���n l c. (ALOOKUP table n = SOME l) ��� (���x. ALOOKUP l c = SOME x) ��� MEM c ALPHABET)`
1726 ,
1727 simp [Brz_invariant_def,invar_def,set_def,ounion_oempty,union_def] >>
1728 rpt gen_tac >> strip_tac >>
1729 conj_asm1_tac
1730 >- (`!x. x ��� set (MAP FST table) ��� x < next_state`
1731           by (fs [count_def, EXTENSION, range_def, frange_def,dom_def]
1732                >> rw [fdom_oimage_inst,apply_def,oin_fdom]
1733                >> fs [fdom_def,fapply_def]
1734                >> metis_tac [THE_DEF]) >>
1735     `LENGTH table = CARD (set (MAP FST table))`
1736           by metis_tac [LENGTH_MAP,ALL_DISTINCT_CARD_LIST_TO_SET, good_table_def] >>
1737     `fdom num_cmp (oimage num_cmp (apply state_map) seen) = range state_map`
1738       by (simp_tac set_ss [range_def, frange_def] >>
1739           qpat_x_assum `dom _ = dom __` mp_tac >>
1740           rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >>
1741           simp_tac set_ss [fdom_def] >> metis_tac[THE_DEF])
1742     >> rw [])
1743>> conj_asm1_tac
1744    >- (match_mp_tac PERM_ALL_DISTINCT
1745          >> rw [all_distinct_count_list, MEM_COUNT_LIST]
1746              >- fs [good_table_def]
1747              >- (`fdom num_cmp (oimage num_cmp (apply state_map) seen) = range state_map`
1748                    by (simp_tac set_ss [range_def, frange_def] >>
1749                        qpat_x_assum `dom _ = dom __` mp_tac >>
1750                        rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >>
1751                        simp_tac set_ss [fdom_def] >> metis_tac[THE_DEF])
1752                    >> rw[]))
1753>> conj_tac
1754   >- (rw [mem_get_accepts]
1755      >> `x IN range state_map`
1756            by (simp_tac set_ss [range_def, frange_def] >> metis_tac[])
1757      >> rfs [count_def] )
1758   >- (rw [] >> fs [good_table_def] >> metis_tac [])
1759);
1760
1761val good_vec_def =
1762 Define
1763  `good_vec vec final_states
1764      <=>
1765    (!l c. MEM c ALPHABET ��� MEM l vec ��� c < LENGTH l) /\
1766    (!n1 c n2 l. n1 < LENGTH vec ��� (EL n1 vec = l) ���
1767                 c < LENGTH (EL n1 vec) ���
1768                 (EL c (EL n1 vec) = SOME n2) ==> n2 < LENGTH vec) /\
1769    (LENGTH final_states = LENGTH vec)`;
1770
1771val Brz_inv_to_good_vec = Q.prove
1772(`!seen next_state state_map table.
1773 Brz_invariant seen [] (next_state,state_map,table) ���
1774  (table_to_vectors table = vec) /\
1775  (accepts_to_vector (get_accepts state_map) next_state = final_states)
1776  ==>
1777  (next_state = LENGTH table) ���
1778  good_vec vec final_states ���
1779  (���r n. (lookup regexp_compare r state_map = SOME n) ��� n < LENGTH vec)`
1780 ,
1781 simp [good_vec_def]
1782  >> rpt gen_tac >> strip_tac
1783  >> imp_res_tac Brz_invariant_final
1784  >> fs [Brz_invariant_def,invar_def,
1785        ounion_oempty,union_def, set_def, oset_def]
1786  >>
1787  `(LENGTH vec = LENGTH table) ���
1788   (LENGTH final_states = LENGTH table) ���
1789   (���n. MEM n (get_accepts state_map) ��� n < LENGTH final_states ��� EL n final_states) ���
1790   (���n. (���l. ALOOKUP table n = SOME l) ��� n < LENGTH vec) ���
1791   (!n l. n < LENGTH vec ��� (LENGTH (EL n vec) = alphabet_size)) ���
1792   (���n c x.
1793     (���l. (ALOOKUP table n = SOME l) ��� (ALOOKUP l c = SOME x))
1794     <=>
1795       n < LENGTH vec ��� c < LENGTH (EL n vec) ���
1796       (EL c (EL n vec) = SOME x))`
1797     by
1798      (match_mp_tac table_to_vec_thm
1799        >> rw [sorted_get_accepts]
1800        >> metis_tac [mem_alphabet,alphabet_size_def])
1801  >> rw []
1802 >- (fs [MEM_EL] >> rw []
1803      >> metis_tac [EL_MEM,mem_alphabet,alphabet_size_def])
1804 >- (`!a. a < LENGTH table <=> MEM a (MAP FST table)`
1805        by metis_tac [MEM_PERM,MEM_COUNT_LIST]
1806      >> rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def,SYM dom_def]
1807      >> rw [dom_def,fdom_def, member_iff_lookup]
1808      >> metis_tac [good_table_def,THE_DEF])
1809 >- (`!a. a < LENGTH table <=> MEM a (MAP FST table)`
1810        by metis_tac [MEM_PERM,MEM_COUNT_LIST]
1811       >> rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def,SYM dom_def]
1812       >> simp_tac set_ss [dom_def,fdom_def, member_iff_lookup]
1813       >> metis_tac [THE_DEF]));
1814
1815val compile_regexp_good_vec = Q.store_thm
1816("compile_regexp_good_vec",
1817`!r state_map table final_states.
1818  dom_Brz empty [normalize r] (1,singleton (normalize r) 0,[]) /\
1819  (compile_regexp r = (state_map, table, final_states))
1820  ==>
1821  good_vec table final_states ���
1822  (!r n. (lookup regexp_compare r state_map = SOME n) ��� n < LENGTH table) ���
1823  normalize r ��� fdom regexp_compare state_map`
1824 ,
1825 simp [compile_regexp_def]
1826  >> rpt gen_tac >> strip_tac
1827  >> fs []
1828  >> `?seen next_state state_map table.
1829         Brzozowski empty [normalize r] (1,singleton (normalize r) 0,[])
1830          = (seen,next_state,state_map,table)`
1831       by metis_tac [pair_CASES]
1832  >> fs []
1833  >> `Brz_invariant empty [normalize r] (1,singleton (normalize r) 0,[])`
1834     by
1835      (fs [Brz_invariant_def,invar_def,dom_def, normalize_thm, fmap_inj_def, good_table_def,
1836          singleton_thm,empty_def,invariant_def,mem_regexp_def,member_iff_lookup]
1837        >> rw [lookup_def]
1838           >- (fs [fdom_def, singleton_def,lookup_def]
1839                >> BasicProvers.EVERY_CASE_TAC
1840                >> rw []
1841                >> metis_tac [eq_cmp_regexp_compare, eq_cmp_def,normalize_thm])
1842           >- (rw [union_def,set_def,GSYM empty_def, GSYM oempty_def] >>
1843               rw [EXTENSION,fdom_def, osingleton_def, singleton_def, lookup_def] >>
1844               CASE_TAC)
1845           >- (rw [range_def,frange_def, EXTENSION, singleton_def,lookup_def,EQ_IMP_THM]
1846                >- (BasicProvers.EVERY_CASE_TAC >> rw [])
1847                >- (Q.EXISTS_TAC `normalize r` >> rw [regexp_compare_id] >> decide_tac))
1848           >- (rw [GSYM empty_def, GSYM oempty_def]
1849               >> rw [fdom_def, lookup_def, empty_def, oempty_def])
1850           >- (ntac 2 (pop_assum mp_tac)
1851                >> simp_tac set_ss [fdom_def,singleton_def,lookup_def]
1852                >> BasicProvers.EVERY_CASE_TAC
1853                >> rw[]
1854                >> metis_tac [eq_cmp_def, eq_cmp_regexp_compare]))
1855 >> imp_res_tac Brz_inv_thm
1856 >> imp_res_tac Brz_mono
1857 >> imp_res_tac Brz_inv_to_good_vec
1858 >> fs []
1859 >> rw []
1860    >- metis_tac []
1861    >- (`normalize r ��� fdom regexp_compare (singleton (normalize r) 0)`
1862         by rw [submap_def,fdom_def,singleton_def,lookup_def,regexp_compare_id]
1863         >> metis_tac [submap_def])
1864);
1865
1866
1867val vec_lang_correct = Q.prove
1868(`!table final_states max_char vec final_states'.
1869  (max_char = alphabet_size) /\
1870  SORTED $<= final_states /\
1871  (���x. MEM x final_states ��� x < LENGTH table) ���
1872  (���n l c. (ALOOKUP table n = SOME l) ��� (���x. ALOOKUP l c = SOME x) ��� c < max_char) ���
1873  (PERM (MAP FST table) (COUNT_LIST (LENGTH table))) ���
1874  (!n'. n' ��� BIGUNION (IMAGE alist_range (alist_range table)) ��� n' < LENGTH table) ���
1875  (table_to_vectors table = vec) /\
1876  (accepts_to_vector final_states (LENGTH table) = final_states')
1877  ���
1878  !n s. (!c. MEM c s ��� c < max_char) ��� n < LENGTH table
1879       ==>
1880        (table_lang final_states table n s
1881           <=>
1882         exec_dfa (fromList final_states')
1883                  (fromList (MAP fromList vec))
1884                  n
1885                  (MAP CHR s))`
1886 ,
1887 rpt gen_tac
1888  >> strip_tac
1889  >> `(LENGTH vec = LENGTH table) ���
1890      (LENGTH final_states' = LENGTH table) ���
1891      (���n. MEM n final_states ��� n < LENGTH final_states' ��� EL n final_states') ���
1892      (���n. (���l. ALOOKUP table n = SOME l) ��� n < LENGTH vec) ���
1893      (!n l. n < LENGTH vec ��� (LENGTH (EL n vec) = max_char)) ���
1894      (���n c x.
1895        (���l. (ALOOKUP table n = SOME l) ��� (ALOOKUP l c = SOME x))
1896        ���
1897        n < LENGTH vec ��� c < LENGTH (EL n vec) ���
1898        (EL c (EL n vec) = SOME x))`
1899        by (match_mp_tac table_to_vec_thm
1900             >> rw []
1901             >> metis_tac [])
1902  >> Induct_on `s`
1903  >> rw [table_lang_def, exec_dfa_thm,fromList_Vector,sub_def]
1904  >> `h < alphabet_size` by metis_tac []
1905  >> rw [ORD_CHR_lem]
1906  >> BasicProvers.EVERY_CASE_TAC
1907  >> fs []
1908     >- metis_tac [NOT_SOME_NONE]
1909     >- (`n < LENGTH (table_to_vectors table)` by metis_tac[]
1910         >> fs [EL_MAP,sub_def]
1911         >> metis_tac [SOME_11, NOT_SOME_NONE])
1912     >- (`n < LENGTH (table_to_vectors table)` by metis_tac[]
1913         >> fs [EL_MAP,sub_def]
1914         >> metis_tac [NOT_SOME_NONE])
1915     >- (`n < LENGTH (table_to_vectors table)` by metis_tac[]
1916         >> fs [EL_MAP,sub_def]
1917         >> `x' = x''` by metis_tac [SOME_11, NOT_SOME_NONE]
1918          >> rw []
1919          >> fs [fromList_Vector]
1920          >> FIRST_X_ASSUM match_mp_tac
1921          >> FIRST_X_ASSUM match_mp_tac
1922          >> rw [PULL_EXISTS]
1923          >> rw [alistTheory.alist_range_def, EXTENSION]
1924          >> metis_tac [])
1925);
1926
1927
1928val Brz_lang_def = Define `
1929  Brz_lang r =
1930    let (state_map,table_vec,finals_vec) = compile_regexp r in
1931    let tableV = fromList (MAP fromList table_vec) in
1932    let finalsV = fromList finals_vec
1933    in
1934      exec_dfa finalsV tableV (apply state_map (normalize r))`;
1935
1936val string_to_numlist = Q.prove
1937(`!s. (!c. c IN set s ==> ORD c IN set ALPHABET) ==>
1938      ?nl. (s = MAP CHR nl) /\ (!n. n IN set nl ==> n IN set ALPHABET)`,
1939 Induct >> rw []
1940  >> `���nl. (s = MAP CHR nl) ��� ���n. MEM n nl ��� MEM n ALPHABET` by metis_tac[]
1941  >> `?n. (h = CHR n) /\ n < 256` by metis_tac [CHR_ONTO]
1942  >> Q.EXISTS_TAC `n::nl`
1943  >> rw []
1944  >> metis_tac [ORD_CHR_RWT,alphabet_size_def]);
1945
1946
1947val Brzozowski_correct = Q.store_thm
1948("Brzozowski_correct",
1949`!r s.
1950  dom_Brz empty [normalize r] (1,singleton (normalize r) 0,[]) /\
1951  (!c. MEM c s ��� MEM (ORD c) ALPHABET)
1952  ==>
1953  (Brz_lang r s = regexp_lang (normalize r) s)`
1954 ,
1955 rw [GSYM regexp_lang_smart_deriv, Brz_lang_def]
1956  >> UNABBREV_ALL_TAC
1957  >> fs [compile_regexp_def]
1958  >> `?seen next_state state_map table.
1959        Brzozowski empty [normalize r] (1,singleton (normalize r) 0,[])
1960          = (seen,next_state,state_map,table)`
1961       by metis_tac [pair_CASES,SND]
1962  >> fs [LET_THM]
1963  >> rw []
1964  >> `Brz_invariant empty [normalize r] (1,singleton (normalize r) 0,[])`
1965      by (rw_tac set_ss [Brz_invariant_def,invar_def,dom_def, normalize_thm,
1966                      fmap_inj_def, good_table_def, singleton_thm,empty_def,
1967                      invariant_def,mem_regexp_def,member_iff_lookup]
1968           >- fs [lookup_def]
1969           >- (fs [fdom_def, singleton_def,lookup_def]
1970                >> BasicProvers.EVERY_CASE_TAC
1971                >> rw []
1972                >> metis_tac [eq_cmp_regexp_compare, eq_cmp_def,normalize_thm])
1973           >- (rw [union_def,set_def,GSYM empty_def, GSYM oempty_def] >>
1974               rw [EXTENSION,fdom_def, osingleton_def, singleton_def, lookup_def] >>
1975               CASE_TAC)
1976           >- (rw [range_def,frange_def, EXTENSION, singleton_def,lookup_def,EQ_IMP_THM]
1977                >- (BasicProvers.EVERY_CASE_TAC >> rw [])
1978                >- (Q.EXISTS_TAC `normalize r` >> rw [regexp_compare_id] >> decide_tac))
1979           >- (rw [GSYM empty_def, GSYM oempty_def]
1980               >> rw [fdom_def, lookup_def, empty_def, oempty_def])
1981           >- (ntac 2 (pop_assum mp_tac)
1982                >> simp_tac set_ss [fdom_def,singleton_def,lookup_def]
1983                >> BasicProvers.EVERY_CASE_TAC
1984                >> rw[]
1985                >> metis_tac [eq_cmp_def, eq_cmp_regexp_compare])
1986           >- fs [alistTheory.ALOOKUP_def]
1987           >- fs [alistTheory.ALOOKUP_def]
1988           >- fs [alistTheory.ALOOKUP_def])
1989 >> imp_res_tac Brz_inv_thm
1990 >> imp_res_tac Brz_mono
1991 >> imp_res_tac Brz_invariant_final
1992 >> fs [Brz_invariant_def]
1993 >> `smart_deriv_matches (normalize r) s
1994      <=>
1995     table_lang (get_accepts state_map) table (apply state_map (normalize r)) (MAP ORD s)`
1996     by (fs [invar_def,set_def,ounion_oempty,union_def]
1997          >> `fdom num_cmp (oimage num_cmp (apply state_map) seen) = frange regexp_compare state_map`
1998                    by (simp_tac set_ss [frange_def] >>
1999                        qpat_x_assum `dom _ = dom __` mp_tac >>
2000                        rw [fdom_oimage_inst,apply_def, oin_fdom,fapply_def, dom_def] >>
2001                        simp_tac set_ss [fdom_def] >> metis_tac[THE_DEF])
2002          >> mp_tac (table_lang_correct
2003                   |> INST_TYPE [alpha |-> ``:num``]
2004                   |> Q.SPEC `get_accepts state_map`
2005                   |> Q.SPEC `table:(num,(num,num)alist)alist`
2006                   |> Q.SPEC `state_map:(regexp,num)balanced_map`)
2007          >> fs[]
2008          >> `(���n r. (lookup regexp_compare r state_map = SOME n) ���
2009                     (MEM n (get_accepts state_map) ��� nullable r))`
2010               by (rw [mem_get_accepts,EQ_IMP_THM]
2011                    >- (`r' IN fdom regexp_compare state_map` by rw [fdom_def]
2012                         >> metis_tac [fmap_inj_def, eq_cmp_regexp_compare, eq_cmp_def])
2013                    >- metis_tac[])
2014          >> rw []
2015          >> pop_assum mp_tac
2016          >> imp_res_tac string_to_numlist
2017          >> DISCH_THEN (assume_tac o Q.SPECL [`0n`, `normalize r`, `nl`])
2018          >> rfs[]
2019          >> rw[]
2020          >> `normalize r IN fdom regexp_compare (singleton (normalize r) 0)`
2021                by (rw [fdom_def, singleton_def,lookup_def,EQ_IMP_THM]
2022                     >> BasicProvers.EVERY_CASE_TAC
2023                     >> fs [regexp_compare_id])
2024          >> `normalize r IN fdom regexp_compare state_map /\
2025              (lookup regexp_compare (normalize r) (singleton (normalize r) 0) =
2026               lookup regexp_compare (normalize r) state_map)`
2027              by metis_tac [submap_def]
2028          >> fs [singleton_def,lookup_def,regexp_compare_id]
2029          >> qpat_x_assum `table_lang (get_accepts state_map) table 0 nl ���
2030                         smart_deriv_matches (normalize r) (MAP CHR nl)`
2031             (SUBST_ALL_TAC o SYM)
2032          >> rw [MAP_MAP_o,combinTheory.o_DEF,apply_def, fapply_def]
2033          >> pop_assum (SUBST_ALL_TAC o SYM)
2034          >> rw []
2035          >> `MAP (��x. ORD (CHR x)) nl = MAP (\x.x) nl`
2036              by (irule MAP_CONG >> rw [] >> metis_tac [mem_alphabet,ORD_CHR_lem])
2037         >> rw[])
2038 >> rw []
2039 >> imp_res_tac string_to_numlist
2040 >> rw [MAP_MAP_o,combinTheory.o_DEF,apply_def, fapply_def]
2041 >> `MAP (��x. ORD (CHR x)) nl = MAP (\x.x) nl`
2042              by (irule MAP_CONG >> rw [] >> metis_tac [mem_alphabet,ORD_CHR_lem])
2043 >> rw []
2044 >> match_mp_tac (GSYM (SIMP_RULE (srw_ss()) [PULL_FORALL, AND_IMP_INTRO] vec_lang_correct))
2045 >> rw []
2046    >- rw [sorted_get_accepts]
2047    >- metis_tac [mem_alphabet]
2048    >- (fs [good_table_def, alistTheory.alist_range_def]
2049         >> res_tac
2050         >> `n'' IN range state_map`
2051              by (simp_tac set_ss [range_def, frange_def] >> metis_tac[])
2052         >> `n'' IN count (LENGTH table)` by metis_tac [EXTENSION]
2053         >> metis_tac [IN_COUNT])
2054   >- metis_tac [mem_alphabet]
2055   >- (`normalize r IN fdom regexp_compare (singleton (normalize r) 0)`
2056          by (rw [fdom_def, singleton_def,lookup_def,EQ_IMP_THM]
2057               >> BasicProvers.EVERY_CASE_TAC
2058               >> fs [regexp_compare_id])
2059        >> `normalize r IN fdom regexp_compare state_map /\
2060            (lookup regexp_compare (normalize r) state_map =
2061             lookup regexp_compare (normalize r) (singleton (normalize r) 0))`
2062               by metis_tac [submap_def]
2063        >> rw [singleton_def,lookup_def,regexp_compare_id]
2064        >> `0n IN range state_map`
2065              by (simp_tac set_ss [range_def, frange_def]
2066                    >> Q.EXISTS_TAC `normalize r`
2067                    >> rw [singleton_def,lookup_def,EQ_IMP_THM]
2068                    >> CASE_TAC
2069                    >> fs [regexp_compare_id])
2070        >> `0n IN count (LENGTH table)` by metis_tac [EXTENSION]
2071        >> metis_tac [IN_COUNT])
2072);
2073
2074val Brzozowski_correct_again = Q.store_thm
2075("Brzozowski_correct",
2076`!r s.
2077  dom_Brz empty [normalize r] (1,singleton (normalize r) 0,[]) /\
2078  (!c. MEM c s ��� MEM (ORD c) ALPHABET)
2079  ==>
2080  (regexp_matcher r s = regexp_lang r s)`
2081 ,
2082 rw []
2083  >> `Brz_lang r s = regexp_lang r s`
2084      by metis_tac [regexp_lang_normalize,Brzozowski_correct]
2085  >> pop_assum (SUBST_ALL_TAC o SYM)
2086  >> rw [Brz_lang_def, regexp_matcher_def, LET_THM,apply_def,fapply_def]);
2087
2088val Brzozowski_partial_eval = Q.store_thm
2089("Brzozowski_partial_eval",
2090`!r state_numbering delta accepts deltaV acceptsV start_state.
2091  (compile_regexp r = (state_numbering,delta,accepts)) /\
2092  (deltaV = fromList (MAP fromList delta)) /\
2093  (acceptsV = fromList accepts) /\
2094  (lookup regexp_compare (normalize r) state_numbering = SOME start_state) /\
2095  dom_Brz empty [normalize r] (1,singleton (normalize r) 0,[])
2096  ==> !s. EVERY (\c. MEM (ORD c) ALPHABET) s
2097          ==>
2098         (exec_dfa acceptsV deltaV start_state s = regexp_lang r s)`
2099 ,
2100 rw [EVERY_MEM]
2101  >> `regexp_matcher r s = regexp_lang r s`
2102      by metis_tac [Brzozowski_correct_again]
2103  >> pop_assum (SUBST_ALL_TAC o SYM)
2104  >> rw [regexp_matcher_def, LET_THM]);
2105
2106val all_ord_string = Q.prove
2107(`EVERY (\c. MEM (ORD c) ALPHABET) s
2108   <=>
2109  EVERY (\c. ORD c < alphabet_size) s`,
2110 simp_tac list_ss [mem_alphabet_iff]);
2111
2112val Brzozowski_partial_eval_conseq = Q.store_thm
2113("Brzozowski_partial_eval_conseq",
2114 `!r state_numbering delta accepts deltaV acceptsV start_state.
2115  (compile_regexp r = (state_numbering,delta,accepts)) /\
2116  (deltaV = fromList (MAP fromList delta)) /\
2117  (acceptsV = fromList accepts) /\
2118  (lookup regexp_compare (normalize r) state_numbering = SOME start_state) /\
2119  dom_Brz_alt empty [normalize r]
2120  ==>
2121  !s. EVERY (��c. ORD c < alphabet_size) s
2122      ==>
2123      (exec_dfa acceptsV deltaV start_state s = regexp_lang r s)`
2124 ,
2125 metis_tac[Brzozowski_partial_eval,all_ord_string,dom_Brz_alt_equal,all_ord_string]);
2126
2127
2128(*---------------------------------------------------------------------------*)
2129(* Eliminate check that all chars in string are in alphabet. This can be     *)
2130(* be done when alphabet = [0..255]                                          *)
2131(*---------------------------------------------------------------------------*)
2132
2133val Brzozowski_partial_eval_256 = save_thm
2134("Brzozowski_partial_eval_256",
2135  if Regexp_Type.alphabet_size = 256 then
2136   SIMP_RULE list_ss [ORD_BOUND,alphabet_size_def] Brzozowski_partial_eval_conseq
2137  else TRUTH);
2138
2139
2140(* val _ = EmitTeX.tex_theory"-"; *)
2141
2142val _ = export_theory();
2143
2144