1structure DFA_Codegen :> DFA_Codegen =
2struct
3
4open Lib Feedback regexpMisc;
5
6val ERR = Feedback.mk_HOL_ERR  "DFA_Codegen";
7
8fun dest_string "" = raise ERR "dest_string" "empty string"
9  | dest_string str =
10     let val c = String.sub(str,0)
11         val t = String.substring(str,1,String.size str - 1)
12     in (c,t)
13     end
14
15fun Upper str =
16 let open Char
17     val (c,t) = dest_string str
18 in if isUpper c then str
19    else String.concat [toString(toUpper c), t]
20 end;
21
22 type dfa =
23   {name       : string,
24    src_regexp : string,
25    finals     : bool list,
26    table      : int list list}
27
28(*---------------------------------------------------------------------------*)
29(* Ada code                                                                  *)
30(*---------------------------------------------------------------------------*)
31
32local
33 fun finalsString nstates list =
34 let val spreadList =
35      spreadlnWith {sep=",", ln="\n     ", width=10} Bool.toString list
36 in
37  String.concat
38   ["ACCEPTING : constant array (0 .. ", Int.toString (nstates-1), ") of Boolean :=",
39    "\n    (", String.concat spreadList, ");"
40   ]
41 end;
42
43fun array256String intList =
44 let val spreadList =
45      spreadlnWith {sep=",", ln="\n     ", width=31} Int.toString intList
46 in
47   String.concat ("(":: spreadList @ [")"])
48 end
49
50fun twoDarrayString nstates intLists =
51  let val arrays = map array256String intLists
52  in String.concat
53      ["TABLE : constant array (0 .. ",
54       Int.toString (nstates-1), ", 0 .. 255) of Integer :=",
55       "\n    (",  String.concat (spread ",\n    " arrays), ");"
56      ]
57  end;
58in
59fun Ada {name,src_regexp,finals,table} ostrm =
60 let val _ = stdErr_print "Generating Ada code.\n"
61     val nstates = List.length finals
62     val Uname = Upper name handle e => raise wrap_exn "Ada" "" e
63 val string = String.concat
64 ["procedure ", Uname, "\nis\n\n",
65  "-----------------------------------------------------------------------------\n",
66  "-- The following DFA ", name, " is the compiled form of regexp\n",
67  "--\n",
68  "--   ", src_regexp, "\n",
69  "--\n",
70  "-- Number of states in DFA: ",Int.toString nstates, "\n",
71  "--\n",
72  "-------------------------------------------------------------------------------\n",
73  "\n",
74  finalsString nstates finals, "\n\n",
75  twoDarrayString nstates table, "\n\n",
76  " function Matcher (S : in String) return Boolean\n",
77  " is\n",
78  "   State : Integer := 0;\n",
79  " begin\n",
80  "   for I in S'first .. S'last\n",
81  "    loop\n",
82  "      State := TABLE(State, Character'Pos(S(I)));\n",
83  "    end loop;\n",
84  "   return ACCEPTING(State);\n",
85  " end Matcher;\n\n",
86  "begin\n\n",
87  "  null;  -- compiler wants a statement here, so here's a trivial one\n\n",
88  "end ", Uname, ";\n"
89  ]
90in
91 TextIO.output(ostrm,string)
92end
93end;
94
95(*---------------------------------------------------------------------------*)
96(* C code                                                                    *)
97(*---------------------------------------------------------------------------*)
98
99local
100fun bool_to_C true = 1
101  | bool_to_C false = 0;
102
103fun finalsString list =
104 let val spreadList =
105        spreadlnWith {sep=",", ln="\n  ", width=31} Int.toString list
106 in
107   String.concat ("{" :: spreadList @ ["}"])
108 end;
109
110fun array256String intList =
111 let val spreadList =
112      spreadlnWith {sep=",", ln="\n   ", width=31} Int.toString intList
113 in
114   String.concat ("{":: spreadList @ ["}"])
115 end
116
117fun twoDarrayString intLists =
118  let val _ = stdErr_print "Generating C code.\n"
119      val arrays = map array256String intLists
120  in String.concat
121      (("{"::spread ",\n " arrays) @ ["};"])
122  end;
123in
124fun C {name,src_regexp,finals,table} ostrm =
125 let val nstates = List.length finals
126     val finals = map bool_to_C finals
127 val string = String.concat
128 ["/*---------------------------------------------------------------------------\n",
129  " * DFA ", name, " is the compiled form of regexp\n",
130  " *\n",
131  " *    ", src_regexp, "\n",
132  " * \n",
133  " * Number of states in DFA: ",Int.toString nstates, "\n",
134  " *\n",
135  " *---------------------------------------------------------------------------*/\n",
136  "\n",
137  "int ACCEPTING_",name," [", Int.toString nstates,"] =\n ",finalsString finals, ";\n",
138  "\n",
139  "unsigned long DELTA_",name," [",Int.toString nstates,"] [256] = \n ",
140  twoDarrayString table,
141  "\n\n",
142  "int match_",name,"(unsigned char *s, int len) {\n",
143  "  int state, i;\n",
144  "\n",
145  "  state = 0;\n",
146  "\n",
147  "  for (i = 0; i < len; i++) { \n",
148  "    state = DELTA_",name,"[state] [s[i]];\n",
149  "   }\n",
150  "\n",
151  "  return ACCEPTING_",name,"[state];\n",
152  "}\n"
153 ]
154 in
155  TextIO.output(ostrm,string)
156 end
157end;
158
159(*---------------------------------------------------------------------------*)
160(* ML code                                                                   *)
161(*---------------------------------------------------------------------------*)
162
163local
164 fun finalsString list =
165 let val spreadList =
166        spreadlnWith {sep=",", ln="\n     ", width=10} Bool.toString list
167 in
168   String.concat ("Vector.fromList\n    [" :: spreadList @ ["]"])
169 end;
170
171fun array256String intList =
172 let val spreadList =
173       spreadlnWith {sep=",", ln="\n     ", width=31} Int.toString intList
174 in
175   String.concat ("[":: spreadList @ ["]"])
176 end
177
178fun twoDarrayString intLists =
179  let val arrays = map array256String intLists
180  in String.concat
181      ((" Vector.fromList (List.map Vector.fromList\n    ["
182         ::spread ",\n    " arrays) @ ["])"])
183  end;
184in
185fun SML {name, src_regexp,finals,table} ostrm =
186 let val _ = stdErr_print "Generating SML code.\n"
187     val nstates = List.length finals
188 val string = String.concat
189 ["(*---------------------------------------------------------------------------\n",
190  " * DFA ", name, " is the compiled form of regexp\n",
191  " *\n",
192  " *   ", src_regexp, "\n",
193  " *\n",
194  " * Number of states in DFA: ",Int.toString nstates, "\n",
195  " *\n",
196  " *---------------------------------------------------------------------------*)\n",
197  "\n",
198  "val ", name, " = let\n",
199  " val FINALS =\n  ",finalsString finals, "\n ",
200  " val DELTA =\n  ", twoDarrayString table, "\n ",
201  "  fun step state char = Vector.sub(Vector.sub(DELTA,state),Char.ord char)\n",
202  " in\n",
203  "   fn s =>\n",
204  "    let val len = String.size s\n",
205  "        fun steps state i =\n",
206  "          if i = len then state\n",
207  "          else steps (step state (String.sub(s,i))) (i+1)\n",
208  "    in\n",
209  "      Vector.sub(FINALS,steps 0 0)\n",
210  "    end\n",
211  " end;\n"
212  ]
213 in
214   TextIO.output(ostrm,string)
215 end
216end;
217
218
219(*---------------------------------------------------------------------------*)
220(* Java code. Note: the generated code can run afoul of Java's restriction   *)
221(* the size of a method (64K as of 2019) when the table size gets too big.   *)
222(*---------------------------------------------------------------------------*)
223
224local
225fun finalsString list =
226 let val spreadList =
227       spreadlnWith {sep=",", ln="\n    ", width=10} Bool.toString list
228 in
229   String.concat ("{" :: spreadList @ ["}"])
230 end;
231
232fun array256String intList =
233 let val spreadList =
234       spreadlnWith {sep=",", ln="\n     ", width=31} Int.toString intList
235 in
236   String.concat ("{":: spreadList @ ["}"])
237 end
238
239fun twoDarrayString intLists =
240  let val arrays = map array256String intLists
241  in String.concat
242      (("   {"::spread ",\n    " arrays) @ ["};"])
243  end;
244in
245fun Java {name, src_regexp,finals,table} ostrm =
246 let val _ = stdErr_print "Generating Java code.\n"
247     val nstates = List.length finals
248     val Uname = Upper name handle e => raise wrap_exn "Java" "" e
249 val string = String.concat
250 ["/*---------------------------------------------------------------------------\n",
251  " -- DFA ", name, " is the compiled form of regexp\n",
252  " --\n",
253  " --   ", src_regexp, "\n",
254  " --\n",
255  " -- Number of states in DFA: ",Int.toString nstates, "\n",
256  " --\n",
257  " *---------------------------------------------------------------------------*/\n",
258  "\n",
259  "public class ", Uname, " {\n",
260  "  boolean ACCEPTING_",name," [] =\n   ",finalsString finals, ";\n",
261  "\n",
262  "  int DELTA_",name,"[][] = \n",
263  twoDarrayString table,
264  "\n\n",
265  "  boolean match_",name,"(byte[] input) {\n",
266  "    int state = 0;\n",
267  "\n",
268  "    for (int i = 0; i < input.length; i++) { \n",
269  "      state = DELTA_",name,"[state][Byte.toUnsignedInt(input[i])];\n",
270  "     }\n",
271  "\n",
272  "    return ACCEPTING_",name,"[state];\n",
273  "  }\n",
274  "}\n"
275 ]
276 in
277  TextIO.output(ostrm,string)
278 end
279end;
280
281
282end
283