1open Lib regexpMisc regexpLib;
2
3val justifyDefault = regexpLib.SML;
4
5val ERR = Feedback.mk_HOL_ERR  "regexp2dfa";
6
7fun failwithERR e =
8  (stdErr_print (Feedback.exn_to_string e);
9   regexpMisc.fail());
10
11fun dest_string "" = raise ERR "dest_string" "empty string"
12  | dest_string str =
13     let val c = String.sub(str,0)
14         val t = String.substring(str,1,String.size str - 1)
15     in (c,t)
16     end
17
18fun Upper str =
19 let open Char
20     val (c,t) = dest_string str
21 in if isUpper c then str
22    else String.concat [toString(toUpper c), t]
23 end;
24
25fun check_compset() =
26 let fun join (s1,s2) = s2^"."^s1
27 in case computeLib.unmapped (regexpLib.regexp_compset())
28     of [] => ()
29      | check_these =>
30         (stdErr_print "Unmapped consts in regexp_compset: \n  ";
31          stdErr_print (String.concat
32             (spreadlnWith {sep=", ", ln = "\n  ", width = 5}
33                           join check_these));
34          stdErr_print "\n\n")
35 end
36
37
38(*---------------------------------------------------------------------------*)
39(* Ada code                                                                  *)
40(*---------------------------------------------------------------------------*)
41
42local
43 fun finalsString nstates list =
44 let val spreadList =
45      spreadlnWith {sep=",", ln="\n     ", width=10} Bool.toString list
46 in
47  String.concat
48   ["ACCEPTING : constant array (0 .. ", Int.toString (nstates-1), ") of Boolean :=",
49    "\n    (", String.concat spreadList, ");"
50   ]
51 end;
52
53fun array256String intList =
54 let val spreadList =
55      spreadlnWith {sep=",", ln="\n     ", width=31} Int.toString intList
56 in
57   String.concat ("(":: spreadList @ [")"])
58 end
59
60fun twoDarrayString nstates intLists =
61  let val arrays = map array256String intLists
62  in String.concat
63      ["TABLE : constant array (0 .. ",
64       Int.toString (nstates-1), ", 0 .. 255) of Integer :=",
65       "\n    (",  String.concat (spread ",\n    " arrays), ");"
66      ]
67  end;
68in
69fun Adafile name quote (_,_,finals,table) =
70 let val _ = stdErr_print "Generating code.\n"
71     val nstates = List.length finals
72     val Uname = Upper name handle e => failwithERR e
73 in String.concat
74 ["procedure ", Uname, "\nis\n\n",
75  "-----------------------------------------------------------------------------\n",
76  "-- The following DFA ", name, " is the compiled form of regexp\n",
77  "--\n",
78  "--   ", quote, "\n",
79  "--\n",
80  "-- Number of states in DFA: ",Int.toString nstates, "\n",
81  "--\n",
82  "-------------------------------------------------------------------------------\n",
83  "\n",
84  finalsString nstates finals, "\n\n",
85  twoDarrayString nstates table, "\n\n",
86  " function Matcher (S : in String) return Boolean\n",
87  " is\n",
88  "   State : Integer := 0;\n",
89  " begin\n",
90  "   for I in S'first .. S'last\n",
91  "    loop\n",
92  "      State := TABLE(State, Character'Pos(S(I)));\n",
93  "    end loop;\n",
94  "   return ACCEPTING(State);\n",
95  " end Matcher;\n\n",
96  "begin\n\n",
97  "  null;  -- compiler wants a statement here, so here's a trivial one\n\n",
98  "end ", Uname, ";\n"
99  ]
100 end
101end;
102
103(*---------------------------------------------------------------------------*)
104(* C code                                                                    *)
105(*---------------------------------------------------------------------------*)
106
107local
108fun bool_to_C true = 1
109  | bool_to_C false = 0;
110
111fun finalsString list =
112 let val spreadList =
113        spreadlnWith {sep=",", ln="\n  ", width=31} Int.toString list
114 in
115   String.concat ("{" :: spreadList @ ["}"])
116 end;
117
118fun array256String intList =
119 let val spreadList =
120      spreadlnWith {sep=",", ln="\n   ", width=31} Int.toString intList
121 in
122   String.concat ("{":: spreadList @ ["}"])
123 end
124
125fun twoDarrayString intLists =
126  let val _ = stdErr_print "Generating code.\n"
127      val arrays = map array256String intLists
128  in String.concat
129      (("{"::spread ",\n " arrays) @ ["};"])
130  end;
131in
132fun Cfile name quote (_,_,finals,table) =
133 let val nstates = List.length finals
134     val finals = map bool_to_C finals
135 in String.concat
136 ["/*---------------------------------------------------------------------------\n",
137  " * DFA ", name, " is the compiled form of regexp\n",
138  " *\n",
139  " *    ", quote, "\n",
140  " * \n",
141  " * Number of states in DFA: ",Int.toString nstates, "\n",
142  " *\n",
143  " *---------------------------------------------------------------------------*/\n",
144  "\n",
145  "int ACCEPTING_",name," [", Int.toString nstates,"] =\n ",finalsString finals, ";\n",
146  "\n",
147  "unsigned long DELTA_",name," [",Int.toString nstates,"] [256] = \n ",
148  twoDarrayString table,
149  "\n\n",
150  "int match_",name,"(unsigned char *s, int len) {\n",
151  "  int state, i;\n",
152  "\n",
153  "  state = 0;\n",
154  "\n",
155  "  for (i = 0; i < len; i++) { \n",
156  "    state = DELTA_",name,"[state] [s[i]];\n",
157  "   }\n",
158  "\n",
159  "  return ACCEPTING_",name,"[state];\n",
160  "}\n"
161 ]
162 end
163end;
164
165(*---------------------------------------------------------------------------*)
166(* ML code                                                                   *)
167(*---------------------------------------------------------------------------*)
168
169local
170 fun finalsString list =
171 let val spreadList =
172        spreadlnWith {sep=",", ln="\n     ", width=10} Bool.toString list
173 in
174   String.concat ("Vector.fromList\n    [" :: spreadList @ ["]"])
175 end;
176
177fun array256String intList =
178 let val spreadList =
179       spreadlnWith {sep=",", ln="\n     ", width=31} Int.toString intList
180 in
181   String.concat ("[":: spreadList @ ["]"])
182 end
183
184fun twoDarrayString intLists =
185  let val arrays = map array256String intLists
186  in String.concat
187      ((" Vector.fromList (List.map Vector.fromList\n    ["
188         ::spread ",\n    " arrays) @ ["])"])
189  end;
190in
191fun MLfile name quote (_,_,finals,table) =
192 let val _ = stdErr_print "Generating code.\n"
193     val nstates = List.length finals
194 in String.concat
195 ["(*---------------------------------------------------------------------------\n",
196  " * DFA ", name, " is the compiled form of regexp\n",
197  " *\n",
198  " *   ", quote, "\n",
199  " *\n",
200  " * Number of states in DFA: ",Int.toString nstates, "\n",
201  " *\n",
202  " *---------------------------------------------------------------------------*)\n",
203  "\n",
204  "val ", name, " = let\n",
205  " val FINALS =\n  ",finalsString finals, "\n ",
206  " val DELTA =\n  ", twoDarrayString table, "\n ",
207  "  fun step state char = Vector.sub(Vector.sub(DELTA,state),Char.ord char)\n",
208  " in\n",
209  "   fn s =>\n",
210  "    let val len = String.size s\n",
211  "        fun steps state i =\n",
212  "          if i = len then state\n",
213  "          else steps (step state (String.sub(s,i))) (i+1)\n",
214  "    in\n",
215  "      Vector.sub(FINALS,steps 0 0)\n",
216  "    end\n",
217  " end;\n"
218  ]
219 end
220end;
221
222
223(*---------------------------------------------------------------------------*)
224(* Java code                                                                 *)
225(*---------------------------------------------------------------------------*)
226
227local
228fun finalsString list =
229 let val spreadList =
230       spreadlnWith {sep=",", ln="\n    ", width=10} Bool.toString list
231 in
232   String.concat ("{" :: spreadList @ ["}"])
233 end;
234
235fun array256String intList =
236 let val spreadList =
237       spreadlnWith {sep=",", ln="\n     ", width=31} Int.toString intList
238 in
239   String.concat ("{":: spreadList @ ["}"])
240 end
241
242fun twoDarrayString intLists =
243  let val arrays = map array256String intLists
244  in String.concat
245      (("   {"::spread ",\n    " arrays) @ ["};"])
246  end;
247in
248fun Javafile name quote (_,_,finals,table) =
249 let val _ = stdErr_print "Generating code.\n"
250     val nstates = List.length finals
251     val Uname = Upper name handle e => failwithERR e
252 in String.concat
253 ["/*---------------------------------------------------------------------------\n",
254  " -- DFA ", name, " is the compiled form of regexp\n",
255  " --\n",
256  " --   ", quote, "\n",
257  " --\n",
258  " -- Number of states in DFA: ",Int.toString nstates, "\n",
259  " --\n",
260  " *---------------------------------------------------------------------------*/\n",
261  "\n",
262  "public class ", Uname, " {\n",
263  "  boolean ACCEPTING_",name," [] =\n   ",finalsString finals, ";\n",
264  "\n",
265  "  int DELTA_",name,"[][] = \n",
266  twoDarrayString table,
267  "\n\n",
268  "  boolean match_",name,"(String s) {\n",
269  "    int state = 0;\n",
270  "\n",
271  "    for (int i = 0; i < s.length(); i++) { \n",
272  "      state = DELTA_",name,"[state][s.charAt(i)];\n",
273  "     }\n",
274  "\n",
275  "    return ACCEPTING_",name,"[state];\n",
276  "  }\n",
277  "}\n"
278 ]
279 end
280end;
281
282fun HOLfile name quote (certificate,_,finals,table) =
283 case certificate
284  of NONE => ""
285   | SOME thm =>
286     let open HolKernel Drule boolLib bossLib
287         val _ = stdErr_print "Generating theorem.\n"
288         val eqn = snd(dest_forall(concl thm))
289         val (exec_dfa,[finals,table,start,s]) = strip_comb(lhs eqn)
290         val name_finals = name^"_finals"
291         val name_table = name^"_table"
292         val name_start = name^"_start"
293         val finals_var = mk_var(name_finals,type_of finals)
294         val table_var  = mk_var(name_table,type_of table)
295         val start_var  = mk_var(name_start,type_of start)
296         val finals_def = new_definition(name_finals,mk_eq(finals_var,finals))
297         val table_def  = new_definition(name_table,mk_eq(table_var,table))
298         val start_def  = new_definition(name_start,mk_eq(start_var,start))
299         val thm' = CONV_RULE (BINDER_CONV
300                      (LHS_CONV (REWRITE_CONV
301                        [GSYM finals_def, GSYM table_def, GSYM start_def])))
302                      thm
303         val thm'' = LIST_CONJ [thm',table_def, finals_def, start_def]
304     in
305       Hol_pp.thm_to_string thm''
306     end
307
308fun deconstruct {certificate, final, matchfn, start, table} =
309 let fun toList V = List.map (curry Vector.sub V) (upto 0 (Vector.length V - 1))
310 in (certificate,start, toList final, toList (Vector.map toList table))
311 end;
312
313(*---------------------------------------------------------------------------*)
314(* Parse, transform, write out code.                                         *)
315(*---------------------------------------------------------------------------*)
316
317datatype lang = Ada | C | Java | ML | Thm;
318
319fun parse_args () =
320 let fun to_lang s =
321      if Lib.mem s ["Ada","ada"] then SOME Ada else
322      if Lib.mem s ["C","c"] then SOME C else
323      if Lib.mem s ["Java","java"] then SOME Java else
324      if Lib.mem s ["ML","SML","ml","sml"] then SOME ML else
325      if Lib.mem s ["THM","Thm","thm"] then SOME Thm
326      else NONE
327     fun check(J,lstring,name,string) =
328       case to_lang lstring
329        of SOME lang => SOME(J,lang,name,string)
330         | NONE => NONE
331 in
332    case CommandLine.arguments()
333     of ["-dfagen","SML",lstring,name,string] => check(regexpLib.SML,lstring,name,string)
334      | ["-dfagen","HOL",lstring,name,string] => check(regexpLib.HOL,lstring,name,string)
335      | [lstring,name,string] => check(justifyDefault, lstring, name,string)
336      | otherwise  => NONE
337 end
338
339fun main () =
340 let fun printHelp() = stdErr_print (String.concat
341      ["Usage: regexp2dfa [-dfagen (HOL | SML)] ",
342       "(Ada | C | Java | ML | Thm) <name> '<regexp>'\n"])
343     fun parse_regexp s =
344       Regexp_Type.fromString s handle e => failwithERR e
345     fun compile_regexp J r =
346       regexpLib.matcher J r handle e => failwithERR e
347 in
348    stdErr_print "regexp2dfa: \n"
349(*  ; check_compset() *)
350  ; case parse_args()
351    of NONE => (printHelp(); regexpMisc.fail())
352     | SOME (justify,lang,name,rstring) =>
353      let val regexp = parse_regexp rstring
354          val _ = stdErr_print "Parsed regexp, now constructing DFA ... "
355          val result = compile_regexp justify regexp
356          val file_string =
357            (case lang
358              of Ada  => Adafile
359               | C    => Cfile
360               | ML   => MLfile
361               | Java => Javafile
362               | Thm  => HOLfile)
363            name rstring (deconstruct result)
364      in
365         stdOut_print file_string
366       ; regexpMisc.succeed()
367      end
368end;
369