1open Lib regexpMisc regexpLib; 2 3val justifyDefault = regexpLib.SML; 4 5val ERR = Feedback.mk_HOL_ERR "regexp2dfa"; 6 7fun failwithERR e = 8 (stdErr_print (Feedback.exn_to_string e); 9 regexpMisc.fail()); 10 11fun dest_string "" = raise ERR "dest_string" "empty string" 12 | dest_string str = 13 let val c = String.sub(str,0) 14 val t = String.substring(str,1,String.size str - 1) 15 in (c,t) 16 end 17 18fun Upper str = 19 let open Char 20 val (c,t) = dest_string str 21 in if isUpper c then str 22 else String.concat [toString(toUpper c), t] 23 end; 24 25fun check_compset() = 26 let fun join (s1,s2) = s2^"."^s1 27 in case computeLib.unmapped (regexpLib.regexp_compset()) 28 of [] => () 29 | check_these => 30 (stdErr_print "Unmapped consts in regexp_compset: \n "; 31 stdErr_print (String.concat 32 (spreadlnWith {sep=", ", ln = "\n ", width = 5} 33 join check_these)); 34 stdErr_print "\n\n") 35 end 36 37 38(*---------------------------------------------------------------------------*) 39(* Ada code *) 40(*---------------------------------------------------------------------------*) 41 42local 43 fun finalsString nstates list = 44 let val spreadList = 45 spreadlnWith {sep=",", ln="\n ", width=10} Bool.toString list 46 in 47 String.concat 48 ["ACCEPTING : constant array (0 .. ", Int.toString (nstates-1), ") of Boolean :=", 49 "\n (", String.concat spreadList, ");" 50 ] 51 end; 52 53fun array256String intList = 54 let val spreadList = 55 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString intList 56 in 57 String.concat ("(":: spreadList @ [")"]) 58 end 59 60fun twoDarrayString nstates intLists = 61 let val arrays = map array256String intLists 62 in String.concat 63 ["TABLE : constant array (0 .. ", 64 Int.toString (nstates-1), ", 0 .. 255) of Integer :=", 65 "\n (", String.concat (spread ",\n " arrays), ");" 66 ] 67 end; 68in 69fun Adafile name quote (_,_,finals,table) = 70 let val _ = stdErr_print "Generating code.\n" 71 val nstates = List.length finals 72 val Uname = Upper name handle e => failwithERR e 73 in String.concat 74 ["procedure ", Uname, "\nis\n\n", 75 "-----------------------------------------------------------------------------\n", 76 "-- The following DFA ", name, " is the compiled form of regexp\n", 77 "--\n", 78 "-- ", quote, "\n", 79 "--\n", 80 "-- Number of states in DFA: ",Int.toString nstates, "\n", 81 "--\n", 82 "-------------------------------------------------------------------------------\n", 83 "\n", 84 finalsString nstates finals, "\n\n", 85 twoDarrayString nstates table, "\n\n", 86 " function Matcher (S : in String) return Boolean\n", 87 " is\n", 88 " State : Integer := 0;\n", 89 " begin\n", 90 " for I in S'first .. S'last\n", 91 " loop\n", 92 " State := TABLE(State, Character'Pos(S(I)));\n", 93 " end loop;\n", 94 " return ACCEPTING(State);\n", 95 " end Matcher;\n\n", 96 "begin\n\n", 97 " null; -- compiler wants a statement here, so here's a trivial one\n\n", 98 "end ", Uname, ";\n" 99 ] 100 end 101end; 102 103(*---------------------------------------------------------------------------*) 104(* C code *) 105(*---------------------------------------------------------------------------*) 106 107local 108fun bool_to_C true = 1 109 | bool_to_C false = 0; 110 111fun finalsString list = 112 let val spreadList = 113 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString list 114 in 115 String.concat ("{" :: spreadList @ ["}"]) 116 end; 117 118fun array256String intList = 119 let val spreadList = 120 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString intList 121 in 122 String.concat ("{":: spreadList @ ["}"]) 123 end 124 125fun twoDarrayString intLists = 126 let val _ = stdErr_print "Generating code.\n" 127 val arrays = map array256String intLists 128 in String.concat 129 (("{"::spread ",\n " arrays) @ ["};"]) 130 end; 131in 132fun Cfile name quote (_,_,finals,table) = 133 let val nstates = List.length finals 134 val finals = map bool_to_C finals 135 in String.concat 136 ["/*---------------------------------------------------------------------------\n", 137 " * DFA ", name, " is the compiled form of regexp\n", 138 " *\n", 139 " * ", quote, "\n", 140 " * \n", 141 " * Number of states in DFA: ",Int.toString nstates, "\n", 142 " *\n", 143 " *---------------------------------------------------------------------------*/\n", 144 "\n", 145 "int ACCEPTING_",name," [", Int.toString nstates,"] =\n ",finalsString finals, ";\n", 146 "\n", 147 "unsigned long DELTA_",name," [",Int.toString nstates,"] [256] = \n ", 148 twoDarrayString table, 149 "\n\n", 150 "int match_",name,"(unsigned char *s, int len) {\n", 151 " int state, i;\n", 152 "\n", 153 " state = 0;\n", 154 "\n", 155 " for (i = 0; i < len; i++) { \n", 156 " state = DELTA_",name,"[state] [s[i]];\n", 157 " }\n", 158 "\n", 159 " return ACCEPTING_",name,"[state];\n", 160 "}\n" 161 ] 162 end 163end; 164 165(*---------------------------------------------------------------------------*) 166(* ML code *) 167(*---------------------------------------------------------------------------*) 168 169local 170 fun finalsString list = 171 let val spreadList = 172 spreadlnWith {sep=",", ln="\n ", width=10} Bool.toString list 173 in 174 String.concat ("Vector.fromList\n [" :: spreadList @ ["]"]) 175 end; 176 177fun array256String intList = 178 let val spreadList = 179 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString intList 180 in 181 String.concat ("[":: spreadList @ ["]"]) 182 end 183 184fun twoDarrayString intLists = 185 let val arrays = map array256String intLists 186 in String.concat 187 ((" Vector.fromList (List.map Vector.fromList\n [" 188 ::spread ",\n " arrays) @ ["])"]) 189 end; 190in 191fun MLfile name quote (_,_,finals,table) = 192 let val _ = stdErr_print "Generating code.\n" 193 val nstates = List.length finals 194 in String.concat 195 ["(*---------------------------------------------------------------------------\n", 196 " * DFA ", name, " is the compiled form of regexp\n", 197 " *\n", 198 " * ", quote, "\n", 199 " *\n", 200 " * Number of states in DFA: ",Int.toString nstates, "\n", 201 " *\n", 202 " *---------------------------------------------------------------------------*)\n", 203 "\n", 204 "val ", name, " = let\n", 205 " val FINALS =\n ",finalsString finals, "\n ", 206 " val DELTA =\n ", twoDarrayString table, "\n ", 207 " fun step state char = Vector.sub(Vector.sub(DELTA,state),Char.ord char)\n", 208 " in\n", 209 " fn s =>\n", 210 " let val len = String.size s\n", 211 " fun steps state i =\n", 212 " if i = len then state\n", 213 " else steps (step state (String.sub(s,i))) (i+1)\n", 214 " in\n", 215 " Vector.sub(FINALS,steps 0 0)\n", 216 " end\n", 217 " end;\n" 218 ] 219 end 220end; 221 222 223(*---------------------------------------------------------------------------*) 224(* Java code *) 225(*---------------------------------------------------------------------------*) 226 227local 228fun finalsString list = 229 let val spreadList = 230 spreadlnWith {sep=",", ln="\n ", width=10} Bool.toString list 231 in 232 String.concat ("{" :: spreadList @ ["}"]) 233 end; 234 235fun array256String intList = 236 let val spreadList = 237 spreadlnWith {sep=",", ln="\n ", width=31} Int.toString intList 238 in 239 String.concat ("{":: spreadList @ ["}"]) 240 end 241 242fun twoDarrayString intLists = 243 let val arrays = map array256String intLists 244 in String.concat 245 ((" {"::spread ",\n " arrays) @ ["};"]) 246 end; 247in 248fun Javafile name quote (_,_,finals,table) = 249 let val _ = stdErr_print "Generating code.\n" 250 val nstates = List.length finals 251 val Uname = Upper name handle e => failwithERR e 252 in String.concat 253 ["/*---------------------------------------------------------------------------\n", 254 " -- DFA ", name, " is the compiled form of regexp\n", 255 " --\n", 256 " -- ", quote, "\n", 257 " --\n", 258 " -- Number of states in DFA: ",Int.toString nstates, "\n", 259 " --\n", 260 " *---------------------------------------------------------------------------*/\n", 261 "\n", 262 "public class ", Uname, " {\n", 263 " boolean ACCEPTING_",name," [] =\n ",finalsString finals, ";\n", 264 "\n", 265 " int DELTA_",name,"[][] = \n", 266 twoDarrayString table, 267 "\n\n", 268 " boolean match_",name,"(String s) {\n", 269 " int state = 0;\n", 270 "\n", 271 " for (int i = 0; i < s.length(); i++) { \n", 272 " state = DELTA_",name,"[state][s.charAt(i)];\n", 273 " }\n", 274 "\n", 275 " return ACCEPTING_",name,"[state];\n", 276 " }\n", 277 "}\n" 278 ] 279 end 280end; 281 282fun HOLfile name quote (certificate,_,finals,table) = 283 case certificate 284 of NONE => "" 285 | SOME thm => 286 let open HolKernel Drule boolLib bossLib 287 val _ = stdErr_print "Generating theorem.\n" 288 val eqn = snd(dest_forall(concl thm)) 289 val (exec_dfa,[finals,table,start,s]) = strip_comb(lhs eqn) 290 val name_finals = name^"_finals" 291 val name_table = name^"_table" 292 val name_start = name^"_start" 293 val finals_var = mk_var(name_finals,type_of finals) 294 val table_var = mk_var(name_table,type_of table) 295 val start_var = mk_var(name_start,type_of start) 296 val finals_def = new_definition(name_finals,mk_eq(finals_var,finals)) 297 val table_def = new_definition(name_table,mk_eq(table_var,table)) 298 val start_def = new_definition(name_start,mk_eq(start_var,start)) 299 val thm' = CONV_RULE (BINDER_CONV 300 (LHS_CONV (REWRITE_CONV 301 [GSYM finals_def, GSYM table_def, GSYM start_def]))) 302 thm 303 val thm'' = LIST_CONJ [thm',table_def, finals_def, start_def] 304 in 305 Hol_pp.thm_to_string thm'' 306 end 307 308fun deconstruct {certificate, final, matchfn, start, table} = 309 let fun toList V = List.map (curry Vector.sub V) (upto 0 (Vector.length V - 1)) 310 in (certificate,start, toList final, toList (Vector.map toList table)) 311 end; 312 313(*---------------------------------------------------------------------------*) 314(* Parse, transform, write out code. *) 315(*---------------------------------------------------------------------------*) 316 317datatype lang = Ada | C | Java | ML | Thm; 318 319fun parse_args () = 320 let fun to_lang s = 321 if Lib.mem s ["Ada","ada"] then SOME Ada else 322 if Lib.mem s ["C","c"] then SOME C else 323 if Lib.mem s ["Java","java"] then SOME Java else 324 if Lib.mem s ["ML","SML","ml","sml"] then SOME ML else 325 if Lib.mem s ["THM","Thm","thm"] then SOME Thm 326 else NONE 327 fun check(J,lstring,name,string) = 328 case to_lang lstring 329 of SOME lang => SOME(J,lang,name,string) 330 | NONE => NONE 331 in 332 case CommandLine.arguments() 333 of ["-dfagen","SML",lstring,name,string] => check(regexpLib.SML,lstring,name,string) 334 | ["-dfagen","HOL",lstring,name,string] => check(regexpLib.HOL,lstring,name,string) 335 | [lstring,name,string] => check(justifyDefault, lstring, name,string) 336 | otherwise => NONE 337 end 338 339fun main () = 340 let fun printHelp() = stdErr_print (String.concat 341 ["Usage: regexp2dfa [-dfagen (HOL | SML)] ", 342 "(Ada | C | Java | ML | Thm) <name> '<regexp>'\n"]) 343 fun parse_regexp s = 344 Regexp_Type.fromString s handle e => failwithERR e 345 fun compile_regexp J r = 346 regexpLib.matcher J r handle e => failwithERR e 347 in 348 stdErr_print "regexp2dfa: \n" 349(* ; check_compset() *) 350 ; case parse_args() 351 of NONE => (printHelp(); regexpMisc.fail()) 352 | SOME (justify,lang,name,rstring) => 353 let val regexp = parse_regexp rstring 354 val _ = stdErr_print "Parsed regexp, now constructing DFA ... " 355 val result = compile_regexp justify regexp 356 val file_string = 357 (case lang 358 of Ada => Adafile 359 | C => Cfile 360 | ML => MLfile 361 | Java => Javafile 362 | Thm => HOLfile) 363 name rstring (deconstruct result) 364 in 365 stdOut_print file_string 366 ; regexpMisc.succeed() 367 end 368end; 369