1structure regexpLib :> regexpLib =
2struct
3
4open HolKernel boolLib bossLib;
5open pairLib optionLib pred_setLib listLib stringLib wordsLib;
6open listTheory stringTheory arithmeticTheory pred_setTheory
7     sortingTheory mergesortTheory comparisonTheory balanced_mapTheory
8     vec_mapTheory charsetTheory
9     regexpTheory regexp_compilerTheory regexp_parserTheory;
10
11open Regexp_Type regexpSyntax regexpMisc;
12
13fun sml_matcher r =
14 let val {matchfn,start,table,final} = Regexp_Match.vector_matcher r
15     val _ = stdErr_print (Int.toString(Vector.length final)^" states.\n")
16 in {certificate = NONE:thm option,
17     matchfn = matchfn,
18     table = table,
19     start = start,
20     final = final}
21 end;
22
23(*---------------------------------------------------------------------------*)
24(* Proof-based compilation of regexps into DFAs, and associated regexp       *)
25(* matching.                                                                 *)
26(*---------------------------------------------------------------------------*)
27
28val numeral_cmp_thm = Q.prove
29(`(num_cmp (NUMERAL x) (NUMERAL y) = num_cmp x y) /\
30  (num_cmp (NUMERAL x) y = num_cmp x y) /\
31  (num_cmp x (NUMERAL y) = num_cmp x y) /\
32  (num_cmp 0 n = num_cmp ZERO n) /\
33  (num_cmp n 0 = num_cmp n ZERO) /\
34  (num_cmp ZERO ZERO = Equal) ���
35  (num_cmp ZERO (BIT1 y) = Less) ���
36  (num_cmp ZERO (BIT2 y) = Less) ���
37  (num_cmp (BIT1 x) ZERO = Greater) ���
38  (num_cmp (BIT2 x) ZERO = Greater) ���
39  (num_cmp (BIT1 x) (BIT1 y) = num_cmp x y) ���
40  (num_cmp (BIT2 x) (BIT2 y) = num_cmp x y) ���
41  (num_cmp (BIT1 x) (BIT2 y) = case num_cmp x y of Greater => Greater | _ => Less) ���
42  (num_cmp (BIT2 x) (BIT1 y) = case num_cmp x y of Less => Less | _ => Greater)`,
43 METIS_TAC [arithmeticTheory.NUMERAL_DEF,comparisonTheory.num_cmp_numOrd,
44            totoTheory.numeralOrd,arithmeticTheory.ALT_ZERO]);
45
46val vector_defs =
47 let (* open ml_translatorTheory *)
48 in [fromList_def, sub_def, length_def]
49 end;
50
51val regexp_compute_thms =
52  vector_defs
53  @
54  [ALPHABET_def, alphabet_size_def, And_def,
55   charset_empty_def, charset_full_def,
56   words4_bit_def, charset_mem_def, charset_union_def,
57   charset_sing_def, merge_charsets_def,
58
59   charset_cmp_def, numeral_cmp_thm, len_cmp_def,
60   regexp_compare_def,regexp_compareW_def,regexp_compare_eq,regexp_leq_def,
61
62   build_char_set_def, Sigma_def, Empty_def, Epsilon_def, catstring_def,
63   assoc_cat_def, build_cat_def, build_neg_def, build_star_def,
64   flatten_or_def, remove_dups_def, build_or_def,
65   nullable_def, nullableW_def, smart_deriv_thm, (* smart_deriv_def *) normalize_def,
66   transitions_def, build_table_def, extend_states_def,
67   insert_regexp_def, mem_regexp_def, relationTheory.inv_image_def,
68
69   dom_Brz_alt_eqns,SIMP_RULE bool_ss [dom_Brz_alt_equal] exec_Brz_def,
70   Brzozo_def, Brzozowski_exec_Brz, MAXNUM_32_def,
71   get_accepts_def, numLib.SUC_RULE (vec_mapTheory.alist_to_vec_def),
72   accepts_to_vector_def, table_to_vectors_def,
73   compile_regexp_def, exec_dfa_def, regexp_matcher_def,
74
75   mergesort_def, mergesortN_def,sort2_def,sort3_def,merge_def,
76   balanced_mapTheory.empty_def,
77   balanced_mapTheory.member_def,
78   balanced_mapTheory.insert_def,
79   balanced_mapTheory.lookup_def,
80   balanced_mapTheory.singleton_def,
81   balanced_mapTheory.balanceL_def,
82   balanced_mapTheory.balanceR_def,
83   balanced_mapTheory.ratio_def,
84   balanced_mapTheory.size_def,
85   balanced_mapTheory.delta_def,
86   balanced_mapTheory.foldrWithKey_def,
87   (* adding stuff revealed by computeLib.unmapped *)
88   combinTheory.o_DEF, combinTheory.I_THM, combinTheory.C_DEF, combinTheory.FAIL_DEF,
89   rich_listTheory.SPLITP_AUX_def, rich_listTheory.SPLITP_compute,rich_listTheory.SEG_compute
90 ];
91
92(*---------------------------------------------------------------------------*)
93(* The complete compset                                                      *)
94(*---------------------------------------------------------------------------*)
95
96fun regexp_compset() =
97 let open computeLib
98     val compset = listLib.list_compset()
99     val _ = optionLib.OPTION_rws compset
100     val _ = pairLib.add_pair_compset compset
101     val _ = pred_setLib.add_pred_set_compset compset
102     val _ = wordsLib.add_words_compset true compset
103     val _ = stringLib.add_string_compset compset
104     val _ = add_datatype_info compset (valOf(TypeBase.fetch ``:ordering``))
105     val _ = add_datatype_info compset (valOf(TypeBase.fetch ``:('a,'b)balanced_map``))
106     val _ = add_datatype_info compset (valOf(TypeBase.fetch ``:charset``))
107     val _ = add_datatype_info compset (valOf(TypeBase.fetch ``:regexp``))
108     val _ = add_thms regexp_compute_thms compset
109 in
110   compset
111 end;
112
113(*---------------------------------------------------------------------------*)
114(* Evaluator based on the compset                                            *)
115(*---------------------------------------------------------------------------*)
116
117val compset = regexp_compset();
118
119val check_these_consts = computeLib.unmapped compset;
120
121val regexpEval = computeLib.CBV_CONV compset;
122
123(*
124max_print_depth := 25;
125
126fun compile q =
127  regexpEval ``compile_regexp ^(mk_regexp(Regexp_Type.fromQuote q))``;
128
129val regexp_tm = mk_regexp(Regexp_Type.fromQuote `a`);
130*)
131
132val Brzozowski_partial_eval_alt =
133   SIMP_RULE bool_ss [dom_Brz_alt_equal] Brzozowski_partial_eval;
134
135fun hol_matcher r =
136 let open listSyntax regexpSyntax
137     val _ = stdErr_print "Compiling regexp to DFA by deduction (can be slow) :\n"
138     val regexp_tm = regexpSyntax.mk_regexp r
139     val compile_thm = Count.apply regexpEval ``regexp_compiler$compile_regexp ^regexp_tm``
140     val triple = rhs (concl compile_thm)
141     val [t1,t2,t3] = strip_pair triple
142     val start_state_thm = regexpEval ``lookup regexp_compare (normalize ^regexp_tm) ^t1``
143     val dom_Brz_thm = EQT_ELIM (Count.apply regexpEval
144                      ``dom_Brz_alt empty [normalize ^regexp_tm]``)
145     val hyps_thm = LIST_CONJ [compile_thm, start_state_thm,dom_Brz_thm]
146     val thm = SIMP_RULE list_ss [fromList_Vector,ORD_BOUND,alphabet_size_def]
147                       (SPEC regexp_tm Brzozowski_partial_eval_conseq)
148     val dfa_thm = MATCH_MP thm hyps_thm
149     fun match_string_by_proof s =
150      let val stm = stringLib.fromMLstring s
151          val dfa_thm1 = SPEC stm dfa_thm
152       (* val (string_ok_tm,_) = dest_imp (concl dfa_thm1)
153          val _ = stdErr_print "Checking input string is in alphabet ... "
154          val string_ok_thm = EQT_ELIM(regexpEval string_ok_tm)
155          val _ = stdErr_print "it is.\n"
156          val dfa_thm2 = MP dfa_thm1 string_ok_thm
157        *)
158          val dfa_thm2 = dfa_thm1
159          val _ = stdErr_print ("Running DFA on string: "^Lib.quote s^" ... ")
160          val dfa_exec_thm = regexpEval (lhs(concl dfa_thm2))
161          val verdict = (boolSyntax.T = rhs(concl dfa_exec_thm))
162          val _ = if verdict then stdErr_print "accepted.\n" else stdErr_print "rejected.\n"
163      in
164        verdict
165      end
166     val eq_tm = snd(strip_forall (concl dfa_thm))
167     val (_,[final,table,start,_]) = strip_comb(boolSyntax.lhs eq_tm)
168     val ifinal = List.map (equal boolSyntax.T)
169                  (fst(listSyntax.dest_list (dest_vector final)))
170     val istart = numSyntax.int_of_term start
171     val rows1 = dest_vector table
172     fun dest_row row =
173        let val opts = fst (listSyntax.dest_list row)
174        in List.map (numSyntax.int_of_term o optionSyntax.dest_some) opts
175        end
176     val rows2 = fst(listSyntax.dest_list(snd(listSyntax.dest_map rows1)))
177     val itable = List.map dest_row rows2
178     val len = length ifinal
179     val _ = stdErr_print (Int.toString len^" states.\n")
180 in
181     {certificate = SOME dfa_thm,
182      matchfn = match_string_by_proof,
183      table = Vector.fromList (map Vector.fromList itable),
184      start = istart,
185      final = Vector.fromList ifinal}
186 end;
187
188
189datatype evaluator = HOL | SML ;
190
191fun matcher HOL = hol_matcher
192  | matcher SML = sml_matcher;
193
194fun dfa_by_proof (name,r) =
195 let val name = if Lexis.ok_identifier name then name
196               else (HOL_MESG (Lib.quote name^
197                       " is not a suitable identifier, using \"foo\" instead");
198                     "foo")
199     val {certificate, start,table,final,matchfn} = hol_matcher r
200     val SOME thm = certificate
201     val eqn = snd(dest_forall(concl thm))
202     val (exec_dfa,[finals,table,start,s]) = strip_comb(lhs eqn)
203     val finals_var = mk_var(name^"_finals",type_of finals)
204     val table_var = mk_var(name^"_table",type_of table)
205     val start_var = mk_var(name^"_start",type_of start)
206     val finals_def = Define `^finals_var = ^finals`
207     val table_def = Define `^table_var = ^table`
208     val start_def = Define `^start_var = ^start`
209     val thm' = CONV_RULE (BINDER_CONV
210                  (LHS_CONV (REWRITE_CONV [GSYM finals_def, GSYM table_def, GSYM start_def])))
211                  thm
212     val thm'' = LIST_CONJ [thm',table_def, finals_def, start_def]
213 in
214   save_thm(name^"_regexp_compilation",thm'')
215 end;
216
217
218end (* regexpLib *)
219