1structure regexpSyntax :> regexpSyntax = 2struct 3 4open Feedback Lib HolKernel boolLib 5 Regexp_Type regexpTheory regexp_compilerTheory; 6 7val ERR = mk_HOL_ERR "regexpSyntax"; 8 9val charset_ty = ``:charset``; 10 11val regexp_ty = mk_thy_type{Thy="regexp",Tyop="regexp",Args=[]}; 12 13(*---------------------------------------------------------------------------*) 14(* regexp constructors *) 15(*---------------------------------------------------------------------------*) 16 17fun mkc s = prim_mk_const {Thy = "regexp", Name = s}; 18 19val chset_tm = mkc "Chset" 20val cat_tm = mkc "Cat" 21val star_tm = mkc "Star" 22val or_tm = mkc "Or" 23val neg_tm = mkc "Neg" 24val and_tm = mkc "And" 25val regexp_matcher_tm = 26 prim_mk_const{Thy="regexp_compiler", Name = "regexp_matcher"}; 27 28 29val mk_chset = mk_monop chset_tm; 30val mk_cat = mk_binop cat_tm 31val mk_star = mk_monop star_tm 32val mk_neg = mk_monop neg_tm 33fun mk_or tlist = mk_comb(or_tm,listSyntax.mk_list(tlist,regexp_ty)); 34val mk_and = mk_binop and_tm 35val mk_regexp_matcher = mk_binop regexp_matcher_tm 36 37val dest_chset = dest_monop chset_tm (ERR "dest_chset" "expected a Chset"); 38val dest_cat = dest_binop cat_tm (ERR "dest_cat" "expected a Cat"); 39val dest_star = dest_monop star_tm (ERR "dest_star" "expected a Star"); 40val dest_neg = dest_monop neg_tm (ERR "dest_neg" "expected a Neg"); 41val dest_and = dest_binop and_tm (ERR "dest_and" "expected an And"); 42val dest_regexp_matcher = dest_binop regexp_matcher_tm 43 (ERR "dest_regexp_matcher" "expected an instance of regexp_matcher"); 44 45fun dest_or tm = 46 let val (tlist,ty) = listSyntax.dest_list 47 (dest_monop or_tm (ERR "dest_or" "expected an Or") tm) 48 in if ty = regexp_ty 49 then tlist 50 else raise ERR "dest_or" "expected Or of list of regexps" 51 end; 52 53(* 54val vector_tm = prim_mk_const{Thy="ml_translator",Name="Vector"}; 55val vector_tm = prim_mk_const{Thy="charset",Name="Vector"} 56*) 57 58val vector_tm = prim_mk_const{Thy="regexp_compiler",Name="Vector"} 59 60val mk_vector = mk_monop vector_tm; 61val dest_vector = dest_monop vector_tm (ERR "dest_vector" "expected a Vector"); 62val is_vector = Lib.can dest_vector; 63 64fun list_mk_vector (tlist,ty) = mk_vector(listSyntax.mk_list(tlist,ty)) 65 66fun strip_vector tm = 67 listSyntax.dest_list(dest_vector tm) 68 handle HOL_ERR _ => raise ERR "strip_vector" ""; 69 70val is_chset = Lib.can dest_chset 71val is_cat = Lib.can dest_cat 72val is_star = Lib.can dest_star 73val is_neg = Lib.can dest_neg 74val is_or = Lib.can dest_or 75val is_and = Lib.can dest_and 76val is_regexp_matcher = Lib.can dest_regexp_matcher; 77 78 79(*---------------------------------------------------------------------------*) 80(* Maps between ML bitvectors and HOL terms *) 81(* The following commented out functions implement the maps for n-bit words *) 82(* 83val bitvector_to_word = 84 let val num_of = Vector.foldr(fn (b,n) => if b then 2 * n + 1 else 2 * n) 0 85 in 86 fn bv => wordsSyntax.mk_wordii(num_of bv,Regexp_Type.alphabet_size) 87 end; 88 89fun word_to_bitvector tm = 90 let val n = wordsSyntax.dest_word_literal tm 91 val A = Array.tabulate(Regexp_Type.alphabet_size, K false) 92 open Arbnum Array 93 val _ = foldli (fn (i,e,n) => 94 let val (d,m) = divmod(n,two) in (update(A,i,m=one); d) end) n A 95 in vector A 96 end 97 98The following functions implement the maps for bool vectors 99 100fun bitvector_to_bitlist bv = 101 let open boolSyntax 102 in Vector.foldr(fn (b,list) => 103 listSyntax.mk_cons(if b then T else F,list)) 104 (listSyntax.mk_nil Type.bool) bv 105 end; 106 107fun bitlist_to_bitvector tm = 108 let val (tmlist,ty) = listSyntax.dest_list tm 109 val blist = List.map (equal boolSyntax.T) tmlist 110 in Vector.fromList blist 111 end 112 113The following functions implement the maps for bool vectors 114 115fun bitvector_to_bitlist bv = 116 let open boolSyntax 117 fun bit ch = (ch = #"1") 118 fun bin i = IntInf.fmt StringCvt.BIN i 119 val ml_bitlist = map bit (explode (bin bv)) 120 in 121 List.foldr(fn (b,list) => 122 listSyntax.mk_cons(if b then T else F,list)) 123 (listSyntax.mk_nil Type.bool) ml_bitlist 124 end; 125 126fun bitlist_to_bitvector tm = 127 let val (tmlist,ty) = listSyntax.dest_list tm 128 fun binchar tm = 129 if tm = T then #"1" else 130 if tm = F then #"0" 131 else raise ERR "bitlist_to_bitvector" "expected boolean literal" 132 val blist = List.map binchar tmlist 133 fun list_reader (h::t) = SOME(h,t) 134 | list_reader [] = NONE 135 in 136 case IntInf.scan StringCvt.BIN list_reader blist 137 of SOME (i,[]) => i 138 | otherwise => raise ERR "bitlist_to_bitvector" "unexpected input" 139 end 140*) 141 142val charset_tm = prim_mk_const{Thy="charset",Name="Charset"} 143 144(*---------------------------------------------------------------------------*) 145(* Lift and drop charsets between ML and HOL *) 146(*---------------------------------------------------------------------------*) 147(* 148val (chop4,join4) = 149 let open IntInf 150 val exp2_64 = pow(2,64) 151 val exp2_128 = pow(2,128) 152 val exp2_192 = pow(2,192) 153 val exp2_256 = pow(2,256) 154 fun chop4 i = 155 let val (a,b) = divMod(i,exp2_64) 156 val (c,d) = divMod(a,exp2_64) 157 val (e,f) = divMod(c,exp2_64) 158 val (g,h) = divMod(e,exp2_64) 159 in (h,f,d,b) 160 end 161 fun join4 (h,f,d,b) = b + d*exp2_64 + f*exp2_128 + h*exp2_192 162 in 163 (chop4,join4) 164 end; 165 166 let val num = IntInf.toInt 167 in fn cset => 168 let open wordsSyntax 169 val (h,f,d,b) = chop4 cset 170 val htm = mk_wordii(num h,64) 171 val ftm = mk_wordii(num f,64) 172 val dtm = mk_wordii(num d,64) 173 val btm = mk_wordii(num b,64) 174 in list_mk_comb(charset_tm,[htm,ftm,dtm,btm]) 175 end 176 end; 177 178fun term_to_charset tm = (* ``:charset`` -> IntInf.int *) 179 case strip_comb tm 180 of (const,[htm,ftm,dtm,btm]) => 181 if same_const const charset_tm 182 then let open wordsSyntax 183 val inf = IntInf.fromInt 184 val h = inf (uint_of_word htm) 185 val f = inf (uint_of_word ftm) 186 val d = inf (uint_of_word dtm) 187 val b = inf (uint_of_word btm) 188 in join4(h,f,d,b) 189 end 190 else raise ERR "term_to_charset" "expected Charset _ _ _ _" 191 | other => raise ERR "term_to_charset" "expected Charset _ _ _ _" 192*) 193 194val charset_to_term = (* w64*w64*w64*w64 -> ``:charset`` *) 195 let val num = Arbnum.fromLargeInt o Word64.toLargeInt 196 in fn (v1,v2,v3,v4) => 197 let open wordsSyntax 198 val v1tm = mk_wordi(num v1,64) 199 val v2tm = mk_wordi(num v2,64) 200 val v3tm = mk_wordi(num v3,64) 201 val v4tm = mk_wordi(num v4,64) 202 in list_mk_comb(charset_tm,[v1tm,v2tm,v3tm,v4tm]) 203 end 204 end; 205 206fun term_to_charset tm = (* ``:charset`` -> w64*w64*w64*w64 *) 207 case strip_comb tm 208 of (const,[v1tm,v2tm,v3tm,v4tm]) => 209 if same_const const charset_tm 210 then let open wordsSyntax 211 val inf = Word64.fromLargeInt o Arbnum.toLargeInt 212 val v1 = inf (dest_word_literal v1tm) 213 val v2 = inf (dest_word_literal v2tm) 214 val v3 = inf (dest_word_literal v3tm) 215 val v4 = inf (dest_word_literal v4tm) 216 in (v1,v2,v3,v4) 217 end 218 else raise ERR "term_to_charset" "expected Charset _ _ _ _" 219 | other => raise ERR "term_to_charset" "expected Charset _ _ _ _" 220 221(*---------------------------------------------------------------------------*) 222(* Build a regexp term from an ML regexp expression *) 223(*---------------------------------------------------------------------------*) 224 225fun mk_regexp r = 226 case r 227 of Chset bv => mk_chset(charset_to_term bv) 228 | Cat (r1,r2) => mk_cat(mk_regexp r1,mk_regexp r2) 229 | Star r => mk_star (mk_regexp r) 230 | Or rlist => mk_or (List.map mk_regexp rlist) 231 | Neg r => mk_neg (mk_regexp r); 232 233fun dest_regexp tm = 234 (case total dest_chset tm 235 of SOME w => Chset (term_to_charset w) 236 | NONE => 237 case total dest_cat tm 238 of SOME (t1,t2) => Cat(dest_regexp t1,dest_regexp t2) 239 | NONE => 240 case total dest_star tm 241 of SOME t => Star (dest_regexp t) 242 | NONE => 243 case total dest_or tm 244 of SOME tlist => Or (List.map dest_regexp tlist) 245 | NONE => 246 case total dest_neg tm 247 of SOME t => Neg (dest_regexp t) 248 | NONE => raise ERR "dest_regexp" "not a ground regexp term") 249 handle e => raise wrap_exn "regexpSyntax" "dest_regexp" e; 250 251(*---------------------------------------------------------------------------*) 252(* Derived syntax *) 253(*---------------------------------------------------------------------------*) 254 255fun mkc s = prim_mk_const {Thy = "charset", Name = s}; 256 257val charset_empty_tm = mkc "charset_empty"; 258val charset_full_tm = mkc "charset_full"; 259val empty_tm = mk_chset charset_empty_tm; 260val sigma_tm = mk_chset charset_full_tm; 261 262val epsilon_tm = mk_star empty_tm; 263val sigmastar_tm = mk_star sigma_tm; 264 265end 266