1structure DFA_Codegen :> DFA_Codegen = 2struct 3 4open Lib Feedback regexpMisc; 5 6val ERR = Feedback.mk_HOL_ERR "DFA_Codegen"; 7 8fun dest_string "" = raise ERR "dest_string" "empty string" 9 | dest_string str = 10 let val c = String.sub(str,0) 11 val t = String.substring(str,1,String.size str - 1) 12 in (c,t) 13 end 14 15fun Upper str = 16 let open Char 17 val (c,t) = dest_string str 18 in if isUpper c then str 19 else String.concat [toString(toUpper c), t] 20 end; 21 22 type dfa = 23 {name : string, 24 src_regexp : string, 25 finals : bool list, 26 table : int list list} 27 28(*---------------------------------------------------------------------------*) 29(* Ada code *) 30(*---------------------------------------------------------------------------*) 31 32local 33 fun finalsString nstates list = 34 let val spreadList = 35 spreadlnWith {sep=",", ln="\n ", width=10} Bool.toString list 36 in 37 String.concat 38 ["ACCEPTING : constant array (0 .. ", Int.toString (nstates-1), ") of Boolean :=", 39 "\n (", String.concat spreadList, ");" 40 ] 41 end; 42 43fun array256String intList = 44 let val spreadList = 45 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString intList 46 in 47 String.concat ("(":: spreadList @ [")"]) 48 end 49 50fun twoDarrayString nstates intLists = 51 let val arrays = map array256String intLists 52 in String.concat 53 ["TABLE : constant array (0 .. ", 54 Int.toString (nstates-1), ", 0 .. 255) of Integer :=", 55 "\n (", String.concat (spread ",\n " arrays), ");" 56 ] 57 end; 58in 59fun Ada {name,src_regexp,finals,table} ostrm = 60 let val _ = stdErr_print "Generating Ada code.\n" 61 val nstates = List.length finals 62 val Uname = Upper name handle e => raise wrap_exn "Ada" "" e 63 val string = String.concat 64 ["procedure ", Uname, "\nis\n\n", 65 "-----------------------------------------------------------------------------\n", 66 "-- The following DFA ", name, " is the compiled form of regexp\n", 67 "--\n", 68 "-- ", src_regexp, "\n", 69 "--\n", 70 "-- Number of states in DFA: ",Int.toString nstates, "\n", 71 "--\n", 72 "-------------------------------------------------------------------------------\n", 73 "\n", 74 finalsString nstates finals, "\n\n", 75 twoDarrayString nstates table, "\n\n", 76 " function Matcher (S : in String) return Boolean\n", 77 " is\n", 78 " State : Integer := 0;\n", 79 " begin\n", 80 " for I in S'first .. S'last\n", 81 " loop\n", 82 " State := TABLE(State, Character'Pos(S(I)));\n", 83 " end loop;\n", 84 " return ACCEPTING(State);\n", 85 " end Matcher;\n\n", 86 "begin\n\n", 87 " null; -- compiler wants a statement here, so here's a trivial one\n\n", 88 "end ", Uname, ";\n" 89 ] 90in 91 TextIO.output(ostrm,string) 92end 93end; 94 95(*---------------------------------------------------------------------------*) 96(* C code *) 97(*---------------------------------------------------------------------------*) 98 99local 100fun bool_to_C true = 1 101 | bool_to_C false = 0; 102 103fun finalsString list = 104 let val spreadList = 105 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString list 106 in 107 String.concat ("{" :: spreadList @ ["}"]) 108 end; 109 110fun array256String intList = 111 let val spreadList = 112 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString intList 113 in 114 String.concat ("{":: spreadList @ ["}"]) 115 end 116 117fun twoDarrayString intLists = 118 let val _ = stdErr_print "Generating C code.\n" 119 val arrays = map array256String intLists 120 in String.concat 121 (("{"::spread ",\n " arrays) @ ["};"]) 122 end; 123in 124fun C {name,src_regexp,finals,table} ostrm = 125 let val nstates = List.length finals 126 val finals = map bool_to_C finals 127 val string = String.concat 128 ["/*---------------------------------------------------------------------------\n", 129 " * DFA ", name, " is the compiled form of regexp\n", 130 " *\n", 131 " * ", src_regexp, "\n", 132 " * \n", 133 " * Number of states in DFA: ",Int.toString nstates, "\n", 134 " *\n", 135 " *---------------------------------------------------------------------------*/\n", 136 "\n", 137 "int ACCEPTING_",name," [", Int.toString nstates,"] =\n ",finalsString finals, ";\n", 138 "\n", 139 "unsigned long DELTA_",name," [",Int.toString nstates,"] [256] = \n ", 140 twoDarrayString table, 141 "\n\n", 142 "int match_",name,"(unsigned char *s, int len) {\n", 143 " int state, i;\n", 144 "\n", 145 " state = 0;\n", 146 "\n", 147 " for (i = 0; i < len; i++) { \n", 148 " state = DELTA_",name,"[state] [s[i]];\n", 149 " }\n", 150 "\n", 151 " return ACCEPTING_",name,"[state];\n", 152 "}\n" 153 ] 154 in 155 TextIO.output(ostrm,string) 156 end 157end; 158 159(*---------------------------------------------------------------------------*) 160(* ML code *) 161(*---------------------------------------------------------------------------*) 162 163local 164 fun finalsString list = 165 let val spreadList = 166 spreadlnWith {sep=",", ln="\n ", width=10} Bool.toString list 167 in 168 String.concat ("Vector.fromList\n [" :: spreadList @ ["]"]) 169 end; 170 171fun array256String intList = 172 let val spreadList = 173 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString intList 174 in 175 String.concat ("[":: spreadList @ ["]"]) 176 end 177 178fun twoDarrayString intLists = 179 let val arrays = map array256String intLists 180 in String.concat 181 ((" Vector.fromList (List.map Vector.fromList\n [" 182 ::spread ",\n " arrays) @ ["])"]) 183 end; 184in 185fun SML {name, src_regexp,finals,table} ostrm = 186 let val _ = stdErr_print "Generating SML code.\n" 187 val nstates = List.length finals 188 val string = String.concat 189 ["(*---------------------------------------------------------------------------\n", 190 " * DFA ", name, " is the compiled form of regexp\n", 191 " *\n", 192 " * ", src_regexp, "\n", 193 " *\n", 194 " * Number of states in DFA: ",Int.toString nstates, "\n", 195 " *\n", 196 " *---------------------------------------------------------------------------*)\n", 197 "\n", 198 "val ", name, " = let\n", 199 " val FINALS =\n ",finalsString finals, "\n ", 200 " val DELTA =\n ", twoDarrayString table, "\n ", 201 " fun step state char = Vector.sub(Vector.sub(DELTA,state),Char.ord char)\n", 202 " in\n", 203 " fn s =>\n", 204 " let val len = String.size s\n", 205 " fun steps state i =\n", 206 " if i = len then state\n", 207 " else steps (step state (String.sub(s,i))) (i+1)\n", 208 " in\n", 209 " Vector.sub(FINALS,steps 0 0)\n", 210 " end\n", 211 " end;\n" 212 ] 213 in 214 TextIO.output(ostrm,string) 215 end 216end; 217 218 219(*---------------------------------------------------------------------------*) 220(* Java code. Note: the generated code can run afoul of Java's restriction *) 221(* the size of a method (64K as of 2019) when the table size gets too big. *) 222(*---------------------------------------------------------------------------*) 223 224local 225fun finalsString list = 226 let val spreadList = 227 spreadlnWith {sep=",", ln="\n ", width=10} Bool.toString list 228 in 229 String.concat ("{" :: spreadList @ ["}"]) 230 end; 231 232fun array256String intList = 233 let val spreadList = 234 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString intList 235 in 236 String.concat ("{":: spreadList @ ["}"]) 237 end 238 239fun twoDarrayString intLists = 240 let val arrays = map array256String intLists 241 in String.concat 242 ((" {"::spread ",\n " arrays) @ ["};"]) 243 end; 244in 245fun Java {name, src_regexp,finals,table} ostrm = 246 let val _ = stdErr_print "Generating Java code.\n" 247 val nstates = List.length finals 248 val Uname = Upper name handle e => raise wrap_exn "Java" "" e 249 val string = String.concat 250 ["/*---------------------------------------------------------------------------\n", 251 " -- DFA ", name, " is the compiled form of regexp\n", 252 " --\n", 253 " -- ", src_regexp, "\n", 254 " --\n", 255 " -- Number of states in DFA: ",Int.toString nstates, "\n", 256 " --\n", 257 " *---------------------------------------------------------------------------*/\n", 258 "\n", 259 "public class ", Uname, " {\n", 260 " boolean ACCEPTING_",name," [] =\n ",finalsString finals, ";\n", 261 "\n", 262 " int DELTA_",name,"[][] = \n", 263 twoDarrayString table, 264 "\n\n", 265 " boolean match_",name,"(byte[] input) {\n", 266 " int state = 0;\n", 267 "\n", 268 " for (int i = 0; i < input.length; i++) { \n", 269 " state = DELTA_",name,"[state][Byte.toUnsignedInt(input[i])];\n", 270 " }\n", 271 "\n", 272 " return ACCEPTING_",name,"[state];\n", 273 " }\n", 274 "}\n" 275 ] 276 in 277 TextIO.output(ostrm,string) 278 end 279end; 280 281 282end 283