1structure regexpSyntax :> regexpSyntax = 2struct 3 4open Feedback Lib HolKernel boolLib 5 Regexp_Type regexpTheory regexp_compilerTheory; 6 7val ERR = mk_HOL_ERR "regexpSyntax"; 8 9val charset_ty = ``:charset``; 10 11val regexp_ty = mk_thy_type{Thy="regexp",Tyop="regexp",Args=[]}; 12 13(*---------------------------------------------------------------------------*) 14(* regexp constructors *) 15(*---------------------------------------------------------------------------*) 16 17fun mkc s = prim_mk_const {Thy = "regexp", Name = s}; 18 19val chset_tm = mkc "Chset" 20val cat_tm = mkc "Cat" 21val star_tm = mkc "Star" 22val or_tm = mkc "Or" 23val neg_tm = mkc "Neg" 24val and_tm = mkc "And" 25val regexp_lang_tm = mkc "regexp_lang" 26val regexp_matcher_tm = 27 prim_mk_const{Thy="regexp_compiler", Name = "regexp_matcher"}; 28 29 30val mk_chset = mk_monop chset_tm; 31val mk_cat = mk_binop cat_tm 32val mk_star = mk_monop star_tm 33val mk_neg = mk_monop neg_tm 34fun mk_or tlist = mk_comb(or_tm,listSyntax.mk_list(tlist,regexp_ty)); 35val mk_and = mk_binop and_tm 36val mk_regexp_lang = mk_monop regexp_lang_tm 37val mk_regexp_matcher = mk_binop regexp_matcher_tm 38 39val dest_chset = dest_monop chset_tm (ERR "dest_chset" "expected a Chset"); 40val dest_cat = dest_binop cat_tm (ERR "dest_cat" "expected a Cat"); 41val dest_star = dest_monop star_tm (ERR "dest_star" "expected a Star"); 42val dest_neg = dest_monop neg_tm (ERR "dest_neg" "expected a Neg"); 43val dest_and = dest_binop and_tm (ERR "dest_and" "expected an And"); 44val dest_regexp_lang = 45 dest_monop regexp_lang_tm 46 (ERR "dest_regexp_lang" "expected regexp_lang application"); 47val dest_regexp_matcher = 48 dest_binop regexp_matcher_tm 49 (ERR "dest_regexp_matcher" "expected an instance of regexp_matcher"); 50 51fun dest_or tm = 52 let val (tlist,ty) = listSyntax.dest_list 53 (dest_monop or_tm (ERR "dest_or" "expected an Or") tm) 54 in if ty = regexp_ty 55 then tlist 56 else raise ERR "dest_or" "expected Or of list of regexps" 57 end; 58 59(* 60val vector_tm = prim_mk_const{Thy="ml_translator",Name="Vector"}; 61val vector_tm = prim_mk_const{Thy="charset",Name="Vector"} 62*) 63 64val vector_tm = prim_mk_const{Thy="regexp_compiler",Name="Vector"} 65 66val mk_vector = mk_monop vector_tm; 67val dest_vector = dest_monop vector_tm (ERR "dest_vector" "expected a Vector"); 68val is_vector = Lib.can dest_vector; 69 70fun list_mk_vector (tlist,ty) = mk_vector(listSyntax.mk_list(tlist,ty)) 71 72fun strip_vector tm = 73 listSyntax.dest_list(dest_vector tm) 74 handle HOL_ERR _ => raise ERR "strip_vector" ""; 75 76val is_chset = Lib.can dest_chset 77val is_cat = Lib.can dest_cat 78val is_star = Lib.can dest_star 79val is_neg = Lib.can dest_neg 80val is_or = Lib.can dest_or 81val is_and = Lib.can dest_and 82val is_regexp_lang = Lib.can dest_regexp_lang 83val is_regexp_matcher = Lib.can dest_regexp_matcher; 84 85 86(*---------------------------------------------------------------------------*) 87(* Maps between ML bitvectors and HOL terms *) 88(* The following commented out functions implement the maps for n-bit words *) 89(* 90val bitvector_to_word = 91 let val num_of = Vector.foldr(fn (b,n) => if b then 2 * n + 1 else 2 * n) 0 92 in 93 fn bv => wordsSyntax.mk_wordii(num_of bv,Regexp_Type.alphabet_size) 94 end; 95 96fun word_to_bitvector tm = 97 let val n = wordsSyntax.dest_word_literal tm 98 val A = Array.tabulate(Regexp_Type.alphabet_size, K false) 99 open Arbnum Array 100 val _ = foldli (fn (i,e,n) => 101 let val (d,m) = divmod(n,two) in (update(A,i,m=one); d) end) n A 102 in vector A 103 end 104 105The following functions implement the maps for bool vectors 106 107fun bitvector_to_bitlist bv = 108 let open boolSyntax 109 in Vector.foldr(fn (b,list) => 110 listSyntax.mk_cons(if b then T else F,list)) 111 (listSyntax.mk_nil Type.bool) bv 112 end; 113 114fun bitlist_to_bitvector tm = 115 let val (tmlist,ty) = listSyntax.dest_list tm 116 val blist = List.map (equal boolSyntax.T) tmlist 117 in Vector.fromList blist 118 end 119 120The following functions implement the maps for bool vectors 121 122fun bitvector_to_bitlist bv = 123 let open boolSyntax 124 fun bit ch = (ch = #"1") 125 fun bin i = IntInf.fmt StringCvt.BIN i 126 val ml_bitlist = map bit (explode (bin bv)) 127 in 128 List.foldr(fn (b,list) => 129 listSyntax.mk_cons(if b then T else F,list)) 130 (listSyntax.mk_nil Type.bool) ml_bitlist 131 end; 132 133fun bitlist_to_bitvector tm = 134 let val (tmlist,ty) = listSyntax.dest_list tm 135 fun binchar tm = 136 if tm = T then #"1" else 137 if tm = F then #"0" 138 else raise ERR "bitlist_to_bitvector" "expected boolean literal" 139 val blist = List.map binchar tmlist 140 fun list_reader (h::t) = SOME(h,t) 141 | list_reader [] = NONE 142 in 143 case IntInf.scan StringCvt.BIN list_reader blist 144 of SOME (i,[]) => i 145 | otherwise => raise ERR "bitlist_to_bitvector" "unexpected input" 146 end 147*) 148 149val charset_tm = prim_mk_const{Thy="charset",Name="Charset"} 150 151(*---------------------------------------------------------------------------*) 152(* Lift and drop charsets between ML and HOL *) 153(*---------------------------------------------------------------------------*) 154(* 155val (chop4,join4) = 156 let open IntInf 157 val exp2_64 = pow(2,64) 158 val exp2_128 = pow(2,128) 159 val exp2_192 = pow(2,192) 160 val exp2_256 = pow(2,256) 161 fun chop4 i = 162 let val (a,b) = divMod(i,exp2_64) 163 val (c,d) = divMod(a,exp2_64) 164 val (e,f) = divMod(c,exp2_64) 165 val (g,h) = divMod(e,exp2_64) 166 in (h,f,d,b) 167 end 168 fun join4 (h,f,d,b) = b + d*exp2_64 + f*exp2_128 + h*exp2_192 169 in 170 (chop4,join4) 171 end; 172 173 let val num = IntInf.toInt 174 in fn cset => 175 let open wordsSyntax 176 val (h,f,d,b) = chop4 cset 177 val htm = mk_wordii(num h,64) 178 val ftm = mk_wordii(num f,64) 179 val dtm = mk_wordii(num d,64) 180 val btm = mk_wordii(num b,64) 181 in list_mk_comb(charset_tm,[htm,ftm,dtm,btm]) 182 end 183 end; 184 185fun term_to_charset tm = (* ``:charset`` -> IntInf.int *) 186 case strip_comb tm 187 of (const,[htm,ftm,dtm,btm]) => 188 if same_const const charset_tm 189 then let open wordsSyntax 190 val inf = IntInf.fromInt 191 val h = inf (uint_of_word htm) 192 val f = inf (uint_of_word ftm) 193 val d = inf (uint_of_word dtm) 194 val b = inf (uint_of_word btm) 195 in join4(h,f,d,b) 196 end 197 else raise ERR "term_to_charset" "expected Charset _ _ _ _" 198 | other => raise ERR "term_to_charset" "expected Charset _ _ _ _" 199*) 200 201val charset_to_term = (* w64*w64*w64*w64 -> ``:charset`` *) 202 let val num = Arbnum.fromLargeInt o Word64.toLargeInt 203 in fn (v1,v2,v3,v4) => 204 let open wordsSyntax 205 val v1tm = mk_wordi(num v1,64) 206 val v2tm = mk_wordi(num v2,64) 207 val v3tm = mk_wordi(num v3,64) 208 val v4tm = mk_wordi(num v4,64) 209 in list_mk_comb(charset_tm,[v1tm,v2tm,v3tm,v4tm]) 210 end 211 end; 212 213fun term_to_charset tm = (* ``:charset`` -> w64*w64*w64*w64 *) 214 case strip_comb tm 215 of (const,[v1tm,v2tm,v3tm,v4tm]) => 216 if same_const const charset_tm 217 then let open wordsSyntax 218 val inf = Word64.fromLargeInt o Arbnum.toLargeInt 219 val v1 = inf (dest_word_literal v1tm) 220 val v2 = inf (dest_word_literal v2tm) 221 val v3 = inf (dest_word_literal v3tm) 222 val v4 = inf (dest_word_literal v4tm) 223 in (v1,v2,v3,v4) 224 end 225 else raise ERR "term_to_charset" "expected Charset _ _ _ _" 226 | other => raise ERR "term_to_charset" "expected Charset _ _ _ _" 227 228(*---------------------------------------------------------------------------*) 229(* Build a regexp term from an ML regexp expression *) 230(*---------------------------------------------------------------------------*) 231 232fun regexp_to_term r = 233 case r 234 of Chset bv => mk_chset(charset_to_term bv) 235 | Cat (r1,r2) => mk_cat(regexp_to_term r1,regexp_to_term r2) 236 | Star r => mk_star (regexp_to_term r) 237 | Or rlist => mk_or (List.map regexp_to_term rlist) 238 | Neg r => mk_neg (regexp_to_term r); 239 240fun term_to_regexp tm = 241 (case total dest_chset tm 242 of SOME w => Chset (term_to_charset w) 243 | NONE => 244 case total dest_cat tm 245 of SOME (t1,t2) => Cat(term_to_regexp t1,term_to_regexp t2) 246 | NONE => 247 case total dest_star tm 248 of SOME t => Star (term_to_regexp t) 249 | NONE => 250 case total dest_or tm 251 of SOME tlist => Or (List.map term_to_regexp tlist) 252 | NONE => 253 case total dest_neg tm 254 of SOME t => Neg (term_to_regexp t) 255 | NONE => raise ERR "term_to_regexp" "not a ground regexp term") 256 handle e => raise wrap_exn "regexpSyntax" "term_to_regexp" e; 257 258(*---------------------------------------------------------------------------*) 259(* Derived syntax *) 260(*---------------------------------------------------------------------------*) 261 262fun mkc s = prim_mk_const {Thy = "charset", Name = s}; 263 264val charset_empty_tm = mkc "charset_empty"; 265val charset_full_tm = mkc "charset_full"; 266val empty_tm = mk_chset charset_empty_tm; 267val dot_tm = mk_chset charset_full_tm; 268 269val epsilon_tm = mk_star empty_tm; 270val dot_star_tm = mk_star dot_tm; 271 272end 273