1structure regexpSyntax :> regexpSyntax =
2struct
3
4open Feedback Lib HolKernel boolLib
5     Regexp_Type regexpTheory regexp_compilerTheory;
6
7val ERR = mk_HOL_ERR "regexpSyntax";
8
9val charset_ty = ``:charset``;
10
11val regexp_ty = mk_thy_type{Thy="regexp",Tyop="regexp",Args=[]};
12
13(*---------------------------------------------------------------------------*)
14(* regexp constructors                                                       *)
15(*---------------------------------------------------------------------------*)
16
17fun mkc s = prim_mk_const {Thy = "regexp", Name = s};
18
19val chset_tm = mkc "Chset"
20val cat_tm   = mkc "Cat"
21val star_tm  = mkc "Star"
22val or_tm    = mkc "Or"
23val neg_tm   = mkc "Neg"
24val and_tm   = mkc "And"
25val regexp_lang_tm = mkc "regexp_lang"
26val regexp_matcher_tm =
27  prim_mk_const{Thy="regexp_compiler", Name = "regexp_matcher"};
28
29
30val mk_chset    = mk_monop chset_tm;
31val mk_cat      = mk_binop cat_tm
32val mk_star     = mk_monop star_tm
33val mk_neg      = mk_monop neg_tm
34fun mk_or tlist = mk_comb(or_tm,listSyntax.mk_list(tlist,regexp_ty));
35val mk_and      = mk_binop and_tm
36val mk_regexp_lang = mk_monop regexp_lang_tm
37val mk_regexp_matcher = mk_binop regexp_matcher_tm
38
39val dest_chset = dest_monop chset_tm (ERR "dest_chset" "expected a Chset");
40val dest_cat   = dest_binop cat_tm   (ERR "dest_cat" "expected a Cat");
41val dest_star  = dest_monop star_tm  (ERR "dest_star" "expected a Star");
42val dest_neg   = dest_monop neg_tm   (ERR "dest_neg" "expected a Neg");
43val dest_and   = dest_binop and_tm   (ERR "dest_and" "expected an And");
44val dest_regexp_lang =
45    dest_monop regexp_lang_tm
46        (ERR "dest_regexp_lang" "expected regexp_lang application");
47val dest_regexp_matcher =
48    dest_binop regexp_matcher_tm
49        (ERR "dest_regexp_matcher" "expected an instance of regexp_matcher");
50
51fun dest_or tm =
52 let val (tlist,ty) = listSyntax.dest_list
53                        (dest_monop or_tm (ERR "dest_or" "expected an Or") tm)
54 in if ty = regexp_ty
55    then tlist
56    else raise ERR "dest_or" "expected Or of list of regexps"
57 end;
58
59(*
60val vector_tm = prim_mk_const{Thy="ml_translator",Name="Vector"};
61val vector_tm = prim_mk_const{Thy="charset",Name="Vector"}
62*)
63
64val vector_tm = prim_mk_const{Thy="regexp_compiler",Name="Vector"}
65
66val mk_vector = mk_monop vector_tm;
67val dest_vector = dest_monop vector_tm (ERR "dest_vector" "expected a Vector");
68val is_vector = Lib.can dest_vector;
69
70fun list_mk_vector (tlist,ty) = mk_vector(listSyntax.mk_list(tlist,ty))
71
72fun strip_vector tm =
73  listSyntax.dest_list(dest_vector tm)
74  handle HOL_ERR _ => raise ERR "strip_vector" "";
75
76val is_chset = Lib.can dest_chset
77val is_cat   = Lib.can dest_cat
78val is_star  = Lib.can dest_star
79val is_neg   = Lib.can dest_neg
80val is_or    = Lib.can dest_or
81val is_and   = Lib.can dest_and
82val is_regexp_lang = Lib.can dest_regexp_lang
83val is_regexp_matcher = Lib.can dest_regexp_matcher;
84
85
86(*---------------------------------------------------------------------------*)
87(* Maps between ML bitvectors and HOL terms                                  *)
88(* The following commented out functions implement the maps for n-bit words  *)
89(*
90val bitvector_to_word =
91 let val num_of = Vector.foldr(fn (b,n) => if b then  2 * n + 1 else 2 * n) 0
92 in
93   fn bv => wordsSyntax.mk_wordii(num_of bv,Regexp_Type.alphabet_size)
94 end;
95
96fun word_to_bitvector tm =
97 let val n = wordsSyntax.dest_word_literal tm
98     val A = Array.tabulate(Regexp_Type.alphabet_size, K false)
99     open Arbnum Array
100     val _ = foldli (fn (i,e,n) =>
101              let val (d,m) = divmod(n,two) in (update(A,i,m=one); d) end) n A
102 in vector A
103 end
104
105The following functions implement the maps for bool vectors
106
107fun bitvector_to_bitlist bv =
108 let open boolSyntax
109 in Vector.foldr(fn (b,list) =>
110        listSyntax.mk_cons(if b then T else F,list))
111       (listSyntax.mk_nil Type.bool) bv
112 end;
113
114fun bitlist_to_bitvector tm =
115 let val (tmlist,ty) = listSyntax.dest_list tm
116     val blist = List.map (equal boolSyntax.T) tmlist
117 in Vector.fromList blist
118 end
119
120The following functions implement the maps for bool vectors
121
122fun bitvector_to_bitlist bv =
123 let open boolSyntax
124     fun bit ch = (ch = #"1")
125     fun bin i = IntInf.fmt StringCvt.BIN i
126     val ml_bitlist = map bit (explode (bin bv))
127 in
128    List.foldr(fn (b,list) =>
129        listSyntax.mk_cons(if b then T else F,list))
130       (listSyntax.mk_nil Type.bool) ml_bitlist
131 end;
132
133fun bitlist_to_bitvector tm =
134 let val (tmlist,ty) = listSyntax.dest_list tm
135     fun binchar tm =
136       if tm = T then #"1" else
137       if tm = F then #"0"
138       else raise ERR "bitlist_to_bitvector" "expected boolean literal"
139     val blist = List.map binchar tmlist
140     fun list_reader (h::t) = SOME(h,t)
141       | list_reader [] = NONE
142 in
143   case IntInf.scan StringCvt.BIN list_reader blist
144    of SOME (i,[]) => i
145     | otherwise => raise ERR "bitlist_to_bitvector" "unexpected input"
146 end
147*)
148
149val charset_tm = prim_mk_const{Thy="charset",Name="Charset"}
150
151(*---------------------------------------------------------------------------*)
152(* Lift and drop charsets between ML and HOL                                 *)
153(*---------------------------------------------------------------------------*)
154(*
155val (chop4,join4) =
156 let open IntInf
157     val exp2_64 = pow(2,64)
158     val exp2_128 = pow(2,128)
159     val exp2_192 = pow(2,192)
160     val exp2_256 = pow(2,256)
161     fun chop4 i =
162      let val (a,b) = divMod(i,exp2_64)
163          val (c,d) = divMod(a,exp2_64)
164          val (e,f) = divMod(c,exp2_64)
165          val (g,h) = divMod(e,exp2_64)
166      in (h,f,d,b)
167      end
168     fun join4 (h,f,d,b) = b + d*exp2_64 + f*exp2_128 + h*exp2_192
169 in
170  (chop4,join4)
171 end;
172
173 let val num = IntInf.toInt
174 in fn cset =>
175    let open wordsSyntax
176        val (h,f,d,b) = chop4 cset
177        val htm = mk_wordii(num h,64)
178        val ftm = mk_wordii(num f,64)
179        val dtm = mk_wordii(num d,64)
180        val btm = mk_wordii(num b,64)
181    in list_mk_comb(charset_tm,[htm,ftm,dtm,btm])
182    end
183 end;
184
185fun term_to_charset tm = (* ``:charset`` -> IntInf.int *)
186 case strip_comb tm
187  of (const,[htm,ftm,dtm,btm]) =>
188      if same_const const charset_tm
189        then let open wordsSyntax
190                 val inf = IntInf.fromInt
191                 val h = inf (uint_of_word htm)
192                 val f = inf (uint_of_word ftm)
193                 val d = inf (uint_of_word dtm)
194                 val b = inf (uint_of_word btm)
195             in join4(h,f,d,b)
196             end
197        else raise ERR "term_to_charset" "expected Charset _ _ _ _"
198   | other => raise ERR "term_to_charset" "expected Charset _ _ _ _"
199*)
200
201val charset_to_term = (* w64*w64*w64*w64 -> ``:charset`` *)
202 let val num = Arbnum.fromLargeInt o Word64.toLargeInt
203 in fn (v1,v2,v3,v4) =>
204    let open wordsSyntax
205        val v1tm = mk_wordi(num v1,64)
206        val v2tm = mk_wordi(num v2,64)
207        val v3tm = mk_wordi(num v3,64)
208        val v4tm = mk_wordi(num v4,64)
209    in list_mk_comb(charset_tm,[v1tm,v2tm,v3tm,v4tm])
210    end
211 end;
212
213fun term_to_charset tm = (* ``:charset`` -> w64*w64*w64*w64 *)
214 case strip_comb tm
215  of (const,[v1tm,v2tm,v3tm,v4tm]) =>
216      if same_const const charset_tm
217        then let open wordsSyntax
218                 val inf = Word64.fromLargeInt o Arbnum.toLargeInt
219                 val v1 = inf (dest_word_literal v1tm)
220                 val v2 = inf (dest_word_literal v2tm)
221                 val v3 = inf (dest_word_literal v3tm)
222                 val v4 = inf (dest_word_literal v4tm)
223             in (v1,v2,v3,v4)
224             end
225        else raise ERR "term_to_charset" "expected Charset _ _ _ _"
226   | other => raise ERR "term_to_charset" "expected Charset _ _ _ _"
227
228(*---------------------------------------------------------------------------*)
229(* Build a regexp term from an ML regexp expression                          *)
230(*---------------------------------------------------------------------------*)
231
232fun regexp_to_term r =
233 case r
234  of Chset bv    => mk_chset(charset_to_term bv)
235   | Cat (r1,r2) => mk_cat(regexp_to_term r1,regexp_to_term r2)
236   | Star r      => mk_star (regexp_to_term r)
237   | Or rlist    => mk_or (List.map regexp_to_term rlist)
238   | Neg r       => mk_neg (regexp_to_term r);
239
240fun term_to_regexp tm =
241 (case total dest_chset tm
242  of SOME w => Chset (term_to_charset w)
243   | NONE =>
244 case total dest_cat tm
245  of SOME (t1,t2) => Cat(term_to_regexp t1,term_to_regexp t2)
246   | NONE =>
247 case total dest_star tm
248  of SOME t => Star (term_to_regexp t)
249   | NONE =>
250 case total dest_or tm
251  of SOME tlist => Or (List.map term_to_regexp tlist)
252   | NONE =>
253 case total dest_neg tm
254  of SOME t => Neg (term_to_regexp t)
255   | NONE => raise ERR "term_to_regexp" "not a ground regexp term")
256 handle e => raise wrap_exn "regexpSyntax" "term_to_regexp" e;
257
258(*---------------------------------------------------------------------------*)
259(* Derived syntax                                                            *)
260(*---------------------------------------------------------------------------*)
261
262fun mkc s = prim_mk_const {Thy = "charset", Name = s};
263
264val charset_empty_tm = mkc "charset_empty";
265val charset_full_tm = mkc "charset_full";
266val empty_tm = mk_chset charset_empty_tm;
267val dot_tm = mk_chset charset_full_tm;
268
269val epsilon_tm = mk_star empty_tm;
270val dot_star_tm = mk_star dot_tm;
271
272end
273