1structure regexpSyntax :> regexpSyntax =
2struct
3
4open Feedback Lib HolKernel boolLib
5     Regexp_Type regexpTheory regexp_compilerTheory;
6
7val ERR = mk_HOL_ERR "regexpSyntax";
8
9val charset_ty = ``:charset``;
10
11val regexp_ty = mk_thy_type{Thy="regexp",Tyop="regexp",Args=[]};
12
13(*---------------------------------------------------------------------------*)
14(* regexp constructors                                                       *)
15(*---------------------------------------------------------------------------*)
16
17fun mkc s = prim_mk_const {Thy = "regexp", Name = s};
18
19val chset_tm = mkc "Chset"
20val cat_tm   = mkc "Cat"
21val star_tm  = mkc "Star"
22val or_tm    = mkc "Or"
23val neg_tm   = mkc "Neg"
24val and_tm   = mkc "And"
25val regexp_matcher_tm =
26  prim_mk_const{Thy="regexp_compiler", Name = "regexp_matcher"};
27
28
29val mk_chset    = mk_monop chset_tm;
30val mk_cat      = mk_binop cat_tm
31val mk_star     = mk_monop star_tm
32val mk_neg      = mk_monop neg_tm
33fun mk_or tlist = mk_comb(or_tm,listSyntax.mk_list(tlist,regexp_ty));
34val mk_and      = mk_binop and_tm
35val mk_regexp_matcher = mk_binop regexp_matcher_tm
36
37val dest_chset = dest_monop chset_tm (ERR "dest_chset" "expected a Chset");
38val dest_cat   = dest_binop cat_tm   (ERR "dest_cat" "expected a Cat");
39val dest_star  = dest_monop star_tm  (ERR "dest_star" "expected a Star");
40val dest_neg   = dest_monop neg_tm   (ERR "dest_neg" "expected a Neg");
41val dest_and   = dest_binop and_tm   (ERR "dest_and" "expected an And");
42val dest_regexp_matcher = dest_binop regexp_matcher_tm
43        (ERR "dest_regexp_matcher" "expected an instance of regexp_matcher");
44
45fun dest_or tm =
46 let val (tlist,ty) = listSyntax.dest_list
47                        (dest_monop or_tm (ERR "dest_or" "expected an Or") tm)
48 in if ty = regexp_ty
49    then tlist
50    else raise ERR "dest_or" "expected Or of list of regexps"
51 end;
52
53(*
54val vector_tm = prim_mk_const{Thy="ml_translator",Name="Vector"};
55val vector_tm = prim_mk_const{Thy="charset",Name="Vector"}
56*)
57
58val vector_tm = prim_mk_const{Thy="regexp_compiler",Name="Vector"}
59
60val mk_vector = mk_monop vector_tm;
61val dest_vector = dest_monop vector_tm (ERR "dest_vector" "expected a Vector");
62val is_vector = Lib.can dest_vector;
63
64fun list_mk_vector (tlist,ty) = mk_vector(listSyntax.mk_list(tlist,ty))
65
66fun strip_vector tm =
67  listSyntax.dest_list(dest_vector tm)
68  handle HOL_ERR _ => raise ERR "strip_vector" "";
69
70val is_chset = Lib.can dest_chset
71val is_cat   = Lib.can dest_cat
72val is_star  = Lib.can dest_star
73val is_neg   = Lib.can dest_neg
74val is_or    = Lib.can dest_or
75val is_and   = Lib.can dest_and
76val is_regexp_matcher = Lib.can dest_regexp_matcher;
77
78
79(*---------------------------------------------------------------------------*)
80(* Maps between ML bitvectors and HOL terms                                  *)
81(* The following commented out functions implement the maps for n-bit words  *)
82(*
83val bitvector_to_word =
84 let val num_of = Vector.foldr(fn (b,n) => if b then  2 * n + 1 else 2 * n) 0
85 in
86   fn bv => wordsSyntax.mk_wordii(num_of bv,Regexp_Type.alphabet_size)
87 end;
88
89fun word_to_bitvector tm =
90 let val n = wordsSyntax.dest_word_literal tm
91     val A = Array.tabulate(Regexp_Type.alphabet_size, K false)
92     open Arbnum Array
93     val _ = foldli (fn (i,e,n) =>
94              let val (d,m) = divmod(n,two) in (update(A,i,m=one); d) end) n A
95 in vector A
96 end
97
98The following functions implement the maps for bool vectors
99
100fun bitvector_to_bitlist bv =
101 let open boolSyntax
102 in Vector.foldr(fn (b,list) =>
103        listSyntax.mk_cons(if b then T else F,list))
104       (listSyntax.mk_nil Type.bool) bv
105 end;
106
107fun bitlist_to_bitvector tm =
108 let val (tmlist,ty) = listSyntax.dest_list tm
109     val blist = List.map (equal boolSyntax.T) tmlist
110 in Vector.fromList blist
111 end
112
113The following functions implement the maps for bool vectors
114
115fun bitvector_to_bitlist bv =
116 let open boolSyntax
117     fun bit ch = (ch = #"1")
118     fun bin i = IntInf.fmt StringCvt.BIN i
119     val ml_bitlist = map bit (explode (bin bv))
120 in
121    List.foldr(fn (b,list) =>
122        listSyntax.mk_cons(if b then T else F,list))
123       (listSyntax.mk_nil Type.bool) ml_bitlist
124 end;
125
126fun bitlist_to_bitvector tm =
127 let val (tmlist,ty) = listSyntax.dest_list tm
128     fun binchar tm =
129       if tm = T then #"1" else
130       if tm = F then #"0"
131       else raise ERR "bitlist_to_bitvector" "expected boolean literal"
132     val blist = List.map binchar tmlist
133     fun list_reader (h::t) = SOME(h,t)
134       | list_reader [] = NONE
135 in
136   case IntInf.scan StringCvt.BIN list_reader blist
137    of SOME (i,[]) => i
138     | otherwise => raise ERR "bitlist_to_bitvector" "unexpected input"
139 end
140*)
141
142val charset_tm = prim_mk_const{Thy="charset",Name="Charset"}
143
144(*---------------------------------------------------------------------------*)
145(* Lift and drop charsets between ML and HOL                                 *)
146(*---------------------------------------------------------------------------*)
147(*
148val (chop4,join4) =
149 let open IntInf
150     val exp2_64 = pow(2,64)
151     val exp2_128 = pow(2,128)
152     val exp2_192 = pow(2,192)
153     val exp2_256 = pow(2,256)
154     fun chop4 i =
155      let val (a,b) = divMod(i,exp2_64)
156          val (c,d) = divMod(a,exp2_64)
157          val (e,f) = divMod(c,exp2_64)
158          val (g,h) = divMod(e,exp2_64)
159      in (h,f,d,b)
160      end
161     fun join4 (h,f,d,b) = b + d*exp2_64 + f*exp2_128 + h*exp2_192
162 in
163  (chop4,join4)
164 end;
165
166 let val num = IntInf.toInt
167 in fn cset =>
168    let open wordsSyntax
169        val (h,f,d,b) = chop4 cset
170        val htm = mk_wordii(num h,64)
171        val ftm = mk_wordii(num f,64)
172        val dtm = mk_wordii(num d,64)
173        val btm = mk_wordii(num b,64)
174    in list_mk_comb(charset_tm,[htm,ftm,dtm,btm])
175    end
176 end;
177
178fun term_to_charset tm = (* ``:charset`` -> IntInf.int *)
179 case strip_comb tm
180  of (const,[htm,ftm,dtm,btm]) =>
181      if same_const const charset_tm
182        then let open wordsSyntax
183                 val inf = IntInf.fromInt
184                 val h = inf (uint_of_word htm)
185                 val f = inf (uint_of_word ftm)
186                 val d = inf (uint_of_word dtm)
187                 val b = inf (uint_of_word btm)
188             in join4(h,f,d,b)
189             end
190        else raise ERR "term_to_charset" "expected Charset _ _ _ _"
191   | other => raise ERR "term_to_charset" "expected Charset _ _ _ _"
192*)
193
194val charset_to_term = (* w64*w64*w64*w64 -> ``:charset`` *)
195 let val num = Arbnum.fromLargeInt o Word64.toLargeInt
196 in fn (v1,v2,v3,v4) =>
197    let open wordsSyntax
198        val v1tm = mk_wordi(num v1,64)
199        val v2tm = mk_wordi(num v2,64)
200        val v3tm = mk_wordi(num v3,64)
201        val v4tm = mk_wordi(num v4,64)
202    in list_mk_comb(charset_tm,[v1tm,v2tm,v3tm,v4tm])
203    end
204 end;
205
206fun term_to_charset tm = (* ``:charset`` -> w64*w64*w64*w64 *)
207 case strip_comb tm
208  of (const,[v1tm,v2tm,v3tm,v4tm]) =>
209      if same_const const charset_tm
210        then let open wordsSyntax
211                 val inf = Word64.fromLargeInt o Arbnum.toLargeInt
212                 val v1 = inf (dest_word_literal v1tm)
213                 val v2 = inf (dest_word_literal v2tm)
214                 val v3 = inf (dest_word_literal v3tm)
215                 val v4 = inf (dest_word_literal v4tm)
216             in (v1,v2,v3,v4)
217             end
218        else raise ERR "term_to_charset" "expected Charset _ _ _ _"
219   | other => raise ERR "term_to_charset" "expected Charset _ _ _ _"
220
221(*---------------------------------------------------------------------------*)
222(* Build a regexp term from an ML regexp expression                          *)
223(*---------------------------------------------------------------------------*)
224
225fun mk_regexp r =
226 case r
227  of Chset bv    => mk_chset(charset_to_term bv)
228   | Cat (r1,r2) => mk_cat(mk_regexp r1,mk_regexp r2)
229   | Star r      => mk_star (mk_regexp r)
230   | Or rlist    => mk_or (List.map mk_regexp rlist)
231   | Neg r       => mk_neg (mk_regexp r);
232
233fun dest_regexp tm =
234 (case total dest_chset tm
235  of SOME w => Chset (term_to_charset w)
236   | NONE =>
237 case total dest_cat tm
238  of SOME (t1,t2) => Cat(dest_regexp t1,dest_regexp t2)
239   | NONE =>
240 case total dest_star tm
241  of SOME t => Star (dest_regexp t)
242   | NONE =>
243 case total dest_or tm
244  of SOME tlist => Or (List.map dest_regexp tlist)
245   | NONE =>
246 case total dest_neg tm
247  of SOME t => Neg (dest_regexp t)
248   | NONE => raise ERR "dest_regexp" "not a ground regexp term")
249 handle e => raise wrap_exn "regexpSyntax" "dest_regexp" e;
250
251(*---------------------------------------------------------------------------*)
252(* Derived syntax                                                            *)
253(*---------------------------------------------------------------------------*)
254
255fun mkc s = prim_mk_const {Thy = "charset", Name = s};
256
257val charset_empty_tm = mkc "charset_empty";
258val charset_full_tm = mkc "charset_full";
259val empty_tm = mk_chset charset_empty_tm;
260val sigma_tm = mk_chset charset_full_tm;
261
262val epsilon_tm = mk_star empty_tm;
263val sigmastar_tm = mk_star sigma_tm;
264
265end
266