1structure regexpLib :> regexpLib = 2struct 3 4open HolKernel boolLib bossLib; 5open pairLib optionLib pred_setLib listLib stringLib wordsLib; 6open listTheory stringTheory arithmeticTheory pred_setTheory 7 sortingTheory mergesortTheory comparisonTheory balanced_mapTheory 8 vec_mapTheory charsetTheory 9 regexpTheory regexp_compilerTheory regexp_parserTheory; 10 11open Regexp_Type regexpSyntax regexpMisc; 12 13fun sml_matcher r = 14 let val {matchfn,start,table,final} = Regexp_Match.vector_matcher r 15 val _ = stdErr_print (Int.toString(Vector.length final)^" states.\n") 16 in {certificate = NONE:thm option, 17 matchfn = matchfn, 18 table = table, 19 start = start, 20 final = final} 21 end; 22 23(*---------------------------------------------------------------------------*) 24(* Proof-based compilation of regexps into DFAs, and associated regexp *) 25(* matching. *) 26(*---------------------------------------------------------------------------*) 27 28val numeral_cmp_thm = Q.prove 29(`(num_cmp (NUMERAL x) (NUMERAL y) = num_cmp x y) /\ 30 (num_cmp (NUMERAL x) y = num_cmp x y) /\ 31 (num_cmp x (NUMERAL y) = num_cmp x y) /\ 32 (num_cmp 0 n = num_cmp ZERO n) /\ 33 (num_cmp n 0 = num_cmp n ZERO) /\ 34 (num_cmp ZERO ZERO = Equal) ��� 35 (num_cmp ZERO (BIT1 y) = Less) ��� 36 (num_cmp ZERO (BIT2 y) = Less) ��� 37 (num_cmp (BIT1 x) ZERO = Greater) ��� 38 (num_cmp (BIT2 x) ZERO = Greater) ��� 39 (num_cmp (BIT1 x) (BIT1 y) = num_cmp x y) ��� 40 (num_cmp (BIT2 x) (BIT2 y) = num_cmp x y) ��� 41 (num_cmp (BIT1 x) (BIT2 y) = case num_cmp x y of Greater => Greater | _ => Less) ��� 42 (num_cmp (BIT2 x) (BIT1 y) = case num_cmp x y of Less => Less | _ => Greater)`, 43 METIS_TAC [arithmeticTheory.NUMERAL_DEF,comparisonTheory.num_cmp_numOrd, 44 totoTheory.numeralOrd,arithmeticTheory.ALT_ZERO]); 45 46val vector_defs = 47 let (* open ml_translatorTheory *) 48 in [fromList_def, sub_def, length_def] 49 end; 50 51val regexp_compute_thms = 52 vector_defs 53 @ 54 [ALPHABET_def, alphabet_size_def, And_def, 55 charset_empty_def, charset_full_def, 56 words4_bit_def, charset_mem_def, charset_union_def, 57 charset_sing_def, merge_charsets_def, 58 59 charset_cmp_def, numeral_cmp_thm, len_cmp_def, 60 regexp_compare_def,regexp_compareW_def,regexp_compare_eq,regexp_leq_def, 61 62 build_char_set_def, Sigma_def, Empty_def, Epsilon_def, catstring_def, 63 assoc_cat_def, build_cat_def, build_neg_def, build_star_def, 64 flatten_or_def, remove_dups_def, build_or_def, 65 nullable_def, nullableW_def, smart_deriv_thm, (* smart_deriv_def *) normalize_def, 66 transitions_def, build_table_def, extend_states_def, 67 insert_regexp_def, mem_regexp_def, relationTheory.inv_image_def, 68 69 dom_Brz_alt_eqns,SIMP_RULE bool_ss [dom_Brz_alt_equal] exec_Brz_def, 70 Brzozo_def, Brzozowski_exec_Brz, MAXNUM_32_def, 71 get_accepts_def, numLib.SUC_RULE (vec_mapTheory.alist_to_vec_def), 72 accepts_to_vector_def, table_to_vectors_def, 73 compile_regexp_def, exec_dfa_def, regexp_matcher_def, 74 75 mergesort_def, mergesortN_def,sort2_def,sort3_def,merge_def, 76 balanced_mapTheory.empty_def, 77 balanced_mapTheory.member_def, 78 balanced_mapTheory.insert_def, 79 balanced_mapTheory.lookup_def, 80 balanced_mapTheory.singleton_def, 81 balanced_mapTheory.balanceL_def, 82 balanced_mapTheory.balanceR_def, 83 balanced_mapTheory.ratio_def, 84 balanced_mapTheory.size_def, 85 balanced_mapTheory.delta_def, 86 balanced_mapTheory.foldrWithKey_def, 87 (* adding stuff revealed by computeLib.unmapped *) 88 combinTheory.o_DEF, combinTheory.I_THM, combinTheory.C_DEF, combinTheory.FAIL_DEF, 89 rich_listTheory.SPLITP_AUX_def, rich_listTheory.SPLITP_compute,rich_listTheory.SEG_compute 90 ]; 91 92(*---------------------------------------------------------------------------*) 93(* The complete compset *) 94(*---------------------------------------------------------------------------*) 95 96fun regexp_compset() = 97 let open computeLib 98 val compset = listLib.list_compset() 99 val _ = optionLib.OPTION_rws compset 100 val _ = pairLib.add_pair_compset compset 101 val _ = pred_setLib.add_pred_set_compset compset 102 val _ = wordsLib.add_words_compset true compset 103 val _ = stringLib.add_string_compset compset 104 val _ = add_datatype_info compset (valOf(TypeBase.fetch ``:ordering``)) 105 val _ = add_datatype_info compset (valOf(TypeBase.fetch ``:('a,'b)balanced_map``)) 106 val _ = add_datatype_info compset (valOf(TypeBase.fetch ``:charset``)) 107 val _ = add_datatype_info compset (valOf(TypeBase.fetch ``:regexp``)) 108 val _ = add_thms regexp_compute_thms compset 109 in 110 compset 111 end; 112 113(*---------------------------------------------------------------------------*) 114(* Evaluator based on the compset *) 115(*---------------------------------------------------------------------------*) 116 117val compset = regexp_compset(); 118 119val check_these_consts = computeLib.unmapped compset; 120 121val regexpEval = computeLib.CBV_CONV compset; 122 123(* 124max_print_depth := 25; 125 126fun compile q = 127 regexpEval ``compile_regexp ^(mk_regexp(Regexp_Type.fromQuote q))``; 128 129val regexp_tm = mk_regexp(Regexp_Type.fromQuote `a`); 130*) 131 132val Brzozowski_partial_eval_alt = 133 SIMP_RULE bool_ss [dom_Brz_alt_equal] Brzozowski_partial_eval; 134 135fun hol_matcher r = 136 let open listSyntax regexpSyntax 137 val _ = stdErr_print "Compiling regexp to DFA by deduction (can be slow) :\n" 138 val regexp_tm = regexpSyntax.mk_regexp r 139 val compile_thm = Count.apply regexpEval ``regexp_compiler$compile_regexp ^regexp_tm`` 140 val triple = rhs (concl compile_thm) 141 val [t1,t2,t3] = strip_pair triple 142 val start_state_thm = regexpEval ``lookup regexp_compare (normalize ^regexp_tm) ^t1`` 143 val dom_Brz_thm = EQT_ELIM (Count.apply regexpEval 144 ``dom_Brz_alt empty [normalize ^regexp_tm]``) 145 val hyps_thm = LIST_CONJ [compile_thm, start_state_thm,dom_Brz_thm] 146 val thm = SIMP_RULE list_ss [fromList_Vector,ORD_BOUND,alphabet_size_def] 147 (SPEC regexp_tm Brzozowski_partial_eval_conseq) 148 val dfa_thm = MATCH_MP thm hyps_thm 149 fun match_string_by_proof s = 150 let val stm = stringLib.fromMLstring s 151 val dfa_thm1 = SPEC stm dfa_thm 152 (* val (string_ok_tm,_) = dest_imp (concl dfa_thm1) 153 val _ = stdErr_print "Checking input string is in alphabet ... " 154 val string_ok_thm = EQT_ELIM(regexpEval string_ok_tm) 155 val _ = stdErr_print "it is.\n" 156 val dfa_thm2 = MP dfa_thm1 string_ok_thm 157 *) 158 val dfa_thm2 = dfa_thm1 159 val _ = stdErr_print ("Running DFA on string: "^Lib.quote s^" ... ") 160 val dfa_exec_thm = regexpEval (lhs(concl dfa_thm2)) 161 val verdict = (boolSyntax.T = rhs(concl dfa_exec_thm)) 162 val _ = if verdict then stdErr_print "accepted.\n" else stdErr_print "rejected.\n" 163 in 164 verdict 165 end 166 val eq_tm = snd(strip_forall (concl dfa_thm)) 167 val (_,[final,table,start,_]) = strip_comb(boolSyntax.lhs eq_tm) 168 val ifinal = List.map (equal boolSyntax.T) 169 (fst(listSyntax.dest_list (dest_vector final))) 170 val istart = numSyntax.int_of_term start 171 val rows1 = dest_vector table 172 fun dest_row row = 173 let val opts = fst (listSyntax.dest_list row) 174 in List.map (numSyntax.int_of_term o optionSyntax.dest_some) opts 175 end 176 val rows2 = fst(listSyntax.dest_list(snd(listSyntax.dest_map rows1))) 177 val itable = List.map dest_row rows2 178 val len = length ifinal 179 val _ = stdErr_print (Int.toString len^" states.\n") 180 in 181 {certificate = SOME dfa_thm, 182 matchfn = match_string_by_proof, 183 table = Vector.fromList (map Vector.fromList itable), 184 start = istart, 185 final = Vector.fromList ifinal} 186 end; 187 188 189datatype evaluator = HOL | SML ; 190 191fun matcher HOL = hol_matcher 192 | matcher SML = sml_matcher; 193 194fun dfa_by_proof (name,r) = 195 let val name = if Lexis.ok_identifier name then name 196 else (HOL_MESG (Lib.quote name^ 197 " is not a suitable identifier, using \"foo\" instead"); 198 "foo") 199 val {certificate, start,table,final,matchfn} = hol_matcher r 200 val SOME thm = certificate 201 val eqn = snd(dest_forall(concl thm)) 202 val (exec_dfa,[finals,table,start,s]) = strip_comb(lhs eqn) 203 val finals_var = mk_var(name^"_finals",type_of finals) 204 val table_var = mk_var(name^"_table",type_of table) 205 val start_var = mk_var(name^"_start",type_of start) 206 val finals_def = Define `^finals_var = ^finals` 207 val table_def = Define `^table_var = ^table` 208 val start_def = Define `^start_var = ^start` 209 val thm' = CONV_RULE (BINDER_CONV 210 (LHS_CONV (REWRITE_CONV [GSYM finals_def, GSYM table_def, GSYM start_def]))) 211 thm 212 val thm'' = LIST_CONJ [thm',table_def, finals_def, start_def] 213 in 214 save_thm(name^"_regexp_compilation",thm'') 215 end; 216 217 218end (* regexpLib *) 219