1(*---------------------------------------------------------------------------*) 2(* Regular expressions and a regexp matcher. *) 3(* Originated from Konrad Slind, tweaked by MJCG for Accellera PSL SEREs *) 4(* An automata-based matcher added by Joe Hurd *) 5(*---------------------------------------------------------------------------*) 6 7structure regexpTools :> regexpTools = 8struct 9 10(* 11app load ["bossLib", "metisLib", "stringLib", "matcherTheory"]; 12*) 13 14open HolKernel Parse boolLib; 15open bossLib metisLib pairTheory combinTheory listTheory rich_listTheory 16 pred_setTheory arithmeticTheory; 17open regexpTheory matcherTheory; 18 19(*---------------------------------------------------------------------------*) 20(* Tracing. *) 21(*---------------------------------------------------------------------------*) 22 23(****************************************************************************** 24* The trace levels of the regular expression library: 25* 0: silent 26* 1: 1 character (either - or +) for each list element processed 27* 2: matches as they are discovered 28* 3: transitions as they are calculated 29* 4: internal state of the automata 30******************************************************************************) 31 32val trace_level = ref 1; 33val () = register_trace ("regexpTools", trace_level, 4); 34fun chatting n = n <= !trace_level; 35fun chat n s = if chatting n then Lib.say s else (); 36 37(*---------------------------------------------------------------------------*) 38(* Terms. *) 39(*---------------------------------------------------------------------------*) 40 41val initial_regexp2na_tm = ``initial_regexp2na : 'a regexp -> num``; 42val accept_regexp2na_tm = ``accept_regexp2na : 'a regexp -> num -> bool``; 43val transition_regexp2na_tm = 44 ``transition_regexp2na : 'a regexp -> num -> 'a -> num -> bool``; 45val eval_accepts_tm = ``eval_accepts : 'a regexp -> num list -> bool``; 46val eval_transitions_tm = 47 ``eval_transitions : 'a regexp -> num list -> 'a -> num list``; 48val exists_transition_regexp2na_tm = 49 ``exists_transition_regexp2na : 'a regexp -> num -> num -> bool``; 50val areport_tm = ``areport : 'a -> 'b -> 'b``; 51 52(*---------------------------------------------------------------------------*) 53(* Function caches. *) 54(*---------------------------------------------------------------------------*) 55 56fun cache order f = 57 let 58 val cache = ref (Binarymap.mkDict order) 59 in 60 fn x => 61 case Binarymap.peek (!cache, x) of 62 SOME y => (true, y) 63 | NONE => 64 let 65 val y = f x 66 val () = cache := Binarymap.insert (!cache, x, y) 67 in 68 (false, y) 69 end 70 end; 71 72(*---------------------------------------------------------------------------*) 73(* Executing the semantic-driven matcher. *) 74(*---------------------------------------------------------------------------*) 75 76val () = computeLib.add_funs [LAST_DEF]; 77 78(*---------------------------------------------------------------------------*) 79(* Executing the automata matcher. *) 80(*---------------------------------------------------------------------------*) 81 82fun cache_conv m n conv = 83 let 84 val cconv = cache compare conv 85 in 86 fn tm => 87 let 88 val (hit, th) = cconv tm 89 val _ = chat m (if hit then "+" else "-") 90 val _ = if chatting n then Lib.say ("\n" ^ thm_to_string th) else () 91 in 92 th 93 end 94 end; 95 96local 97 val t_or = CONJUNCT1 (SPEC_ALL OR_CLAUSES); 98 99 fun witness_conv tm = 100 let 101 val () = if not (chatting 4) then () 102 else Lib.say ("\nwitness_conv: " ^ term_to_string tm) 103 val (x,b) = dest_exists tm 104 val (ty,_) = dom_rng (type_of x) 105 val conjs = strip_conj b 106 val emp_tm = inst [alpha |-> ty] pred_setSyntax.empty_tm 107 val ins_tm = inst [alpha |-> ty] pred_setSyntax.insert_tm 108 fun ins (v,s) = mk_comb (mk_comb (ins_tm, rand (rator v)), s) 109 val set = foldr ins emp_tm (filter (not o is_neg) conjs) 110 val goal = subst [x |-> set] b 111 val wit_th = EQT_ELIM (EVAL goal) 112 val ex_th = EXISTS (tm, set) wit_th 113 in 114 EQT_INTRO ex_th 115 end; 116 117 val sat_conv = 118 QUANT_CONV normalForms.DNF_CONV 119 THENC (REWR_CONV boolTheory.EXISTS_SIMP 120 ORELSEC (EXISTS_OR_CONV 121 THENC LAND_CONV witness_conv 122 THENC REWR_CONV t_or) 123 ORELSEC witness_conv); 124in 125 fun set_sat_conv tm = 126 let 127 val () = if not (chatting 4) then () 128 else Lib.say ("\nset_sat_conv: " ^ term_to_string tm) 129 in 130 sat_conv tm 131 handle e as HOL_ERR _ => Raise e 132 end; 133 end; 134 135local 136 val chr_ss = simpLib.++ (boolSimps.bool_ss, numSimps.REDUCE_ss); 137in 138 val chr_sat_conv = 139 QUANT_CONV normalForms.DNF_CONV 140 THENC SIMP_CONV chr_ss [chr_11, chr_suff, chr_suff1]; 141end; 142 143val prefix_sat_conv = ref set_sat_conv; 144 145local 146 fun exists_conv tm = 147 (ONCE_REWRITE_CONV [exists_transition_regexp2na_def] 148 THENC QUANT_CONV EVAL 149 THENC !prefix_sat_conv) tm; 150in 151 val exists_transition_regexp2na_conv = cache_conv 3 4 exists_conv; 152end; 153 154val initial_regexp2na_conv = 155 cache_conv 3 4 (ONCE_REWRITE_CONV [initial_regexp2na] THENC EVAL); 156 157val accept_regexp2na_conv = 158 cache_conv 3 4 (ONCE_REWRITE_CONV [accept_regexp2na] THENC EVAL); 159 160val transition_regexp2na_conv = 161 cache_conv 3 4 (ONCE_REWRITE_CONV [transition_regexp2na] THENC EVAL); 162 163val eval_accepts_conv = 164 cache_conv 2 3 (ONCE_REWRITE_CONV [eval_accepts_def] THENC EVAL); 165 166val eval_transitions_conv = 167 cache_conv 1 3 (ONCE_REWRITE_CONV [eval_transitions_def] THENC EVAL); 168 169local 170 fun hol_rev tm = 171 let val (l,ty) = listSyntax.dest_list tm 172 in listSyntax.mk_list (rev l, ty) 173 end; 174in 175 fun areport_conv tm = 176 let 177 val l = hol_rev (rand (rator tm)) 178 val () = if not (chatting 2) then () 179 else Lib.say ("\nmatch: " ^ term_to_string l) 180 in 181 REWR_CONV areport_def 182 end tm; 183end; 184 185val () = computeLib.add_funs 186 [(* Prefer the cached conversions of 187 initial_regexp2na, accept_regexp2na, transition_regexp2na, 188 eval_accepts, eval_transitions *) 189 matcherTheory.astep_def, 190 matcherTheory.dijkstra_def]; 191 192val () = computeLib.add_convs 193 [(initial_regexp2na_tm, 1, initial_regexp2na_conv), 194 (accept_regexp2na_tm, 2, accept_regexp2na_conv), 195 (transition_regexp2na_tm, 4, transition_regexp2na_conv), 196 (eval_accepts_tm, 2, eval_accepts_conv), 197 (eval_transitions_tm, 3, eval_transitions_conv), 198 (exists_transition_regexp2na_tm, 3, exists_transition_regexp2na_conv), 199 (areport_tm, 2, areport_conv)]; 200 201(*---------------------------------------------------------------------------*) 202(* Speed up the evaluation of very long lists. *) 203(*---------------------------------------------------------------------------*) 204 205local 206 val dropize = 207 (CONV_RULE o LAND_CONV o ONCE_REWRITE_CONV) [GSYM (CONJUNCT1 drop_def)]; 208 209 fun dest_single l = 210 let 211 val (h,t) = listSyntax.dest_cons l 212 val _ = listSyntax.is_nil t orelse raise ERR "dest_single" "" 213 in 214 h 215 end; 216 217 val is_single = can dest_single; 218 219 val reduce = CONV_RULE (LAND_CONV reduceLib.REDUCE_CONV); 220 221 fun loop acc th = 222 let 223 val acc = MATCH_MP head_drop th :: acc 224 in 225 if is_single (snd (dest_eq (concl th))) then 226 CONV_RULE reduceLib.REDUCE_CONV (MATCH_MP length_drop th) :: acc 227 else loop acc (reduce (MATCH_MP tail_drop th)) 228 end; 229in 230 fun EVAL_BIGLIST def = let val def = dropize def in loop [def] def end; 231end; 232 233(*---------------------------------------------------------------------------*) 234(* Export a regular expression as a Verilog state machine. *) 235(*---------------------------------------------------------------------------*) 236 237val LINE_LENGTH = ref 79; 238 239local fun dup _ 0 l = l | dup x n l = dup x (n - 1) (x :: l); 240in fun chs x n = implode (dup x n []); 241end; 242 243fun DISCH_CONJ [] = I 244 | DISCH_CONJ [x] = DISCH x 245 | DISCH_CONJ (x :: (xs as _ :: _)) = 246 CONV_RULE (REWR_CONV AND_IMP_INTRO) o DISCH x o DISCH_CONJ xs; 247 248fun all_subsets [] = [[]] 249 | all_subsets (x :: xs) = 250 let val y = all_subsets xs 251 in map (cons (x,true)) y @ map (cons (x,false)) y 252 end; 253 254val dest_in = dest_binop ``(IN)`` (ERR "dest_in" ""); 255 256fun mk_alph s = 257 let 258 val t = map (rhs o concl o EVAL) s 259 val ts = zip t s 260 in 261 fn x => assoc x ts 262 end; 263 264datatype 'a condition = 265 Leaf of 'a 266| Branch of string * 'a condition * 'a condition; 267 268local 269 val s = ``s : num -> bool``; 270 271 val empty : (term,int) Binarymap.dict = Binarymap.mkDict compare; 272 273 fun member (m : (term,int) Binarymap.dict) t = 274 Option.isSome (Binarymap.peek (m, t)); 275 276 fun harvest_terms acc [] = acc 277 | harvest_terms acc (Leaf (tm,_) :: l) = harvest_terms (tm :: acc) l 278 | harvest_terms acc (Branch (_,a,b) :: l) = harvest_terms acc (a :: b :: l); 279 280 fun finalize (set : (term,int) Binarymap.dict) = 281 let 282 fun f (Leaf (tm,th)) = Leaf (Binarymap.find (set,tm), th) 283 | f (Branch (c,a,b)) = Branch (c, f a, f b) 284 fun g acc [] = acc 285 | g acc ((i,t,a,c) :: r) = g ((i, t, a, f c) :: acc) r 286 in 287 g [] 288 end; 289 290 fun initial r = 291 let 292 val ty = (hd o snd o dest_type o type_of) r 293 val e = inst [alpha |-> ty] initial_regexp2na_tm 294 val t = 295 mk_comb (e, r) 296 handle HOL_ERR _ => 297 raise Fail 298 ("export_nfa.initial.mk_comb: " ^ 299 " e = " ^ type_to_string (type_of e) ^ 300 ", r = " ^ type_to_string (type_of r)) 301 val res = rhs (concl (EVAL t)) 302 in 303 listSyntax.mk_list ([res], numSyntax.num) 304 end; 305 306 fun accept r s = 307 let 308 val ty = (hd o snd o dest_type o type_of) r 309 val e = inst [alpha |-> ty] eval_accepts_tm 310 val t = 311 list_mk_comb (e, [r, s]) 312 handle HOL_ERR _ => raise Fail "export_nfa.accept.list_mk_comb" 313 val th = EVAL t 314 val res = rhs (concl th) 315 val _ = 316 res = T orelse res = F orelse 317 raise ERR "export_nfa.accept" "couldn't reduce eval_accepts" 318 in 319 (res = T, th) 320 end; 321 322 fun simp tm = 323 let val th = CONV_RULE EVAL (ASSUME tm) 324 in CONV_RULE (RAND_CONV (SIMP_CONV boolSimps.bool_ss [th])) 325 end; 326 327 fun mk_condition (m,d,gen) = 328 let 329 fun g bs = GEN s o SPEC s o gen o DISCH_CONJ (rev bs) 330 331 fun f bs th = 332 let val (_,tm) = dest_eq (concl th) in 333 case total (dest_cond o find_term is_cond) tm of 334 NONE => Leaf (tm, g bs th) 335 | SOME (c,_,_) => 336 let 337 val () = if not (chatting 3) then () 338 else chat 3 ("extract_dfa.mk_condition: th =\n" ^ 339 thm_to_string th ^ "\n") 340 val t = find_term (can d) c 341 val v = d t 342 val s = #Name (dest_thy_const v) 343 val tm = m v 344 val tm' = mk_neg tm 345 in 346 Branch (s, f (tm :: bs) (simp tm th), f (tm' :: bs) (simp tm' th)) 347 end 348 end 349 in 350 f [] 351 end; 352 353 fun trans alph r s = 354 let 355 val ty = (hd o snd o dest_type o type_of) r 356 val x = genvar ty 357 fun mvar t = pred_setSyntax.mk_in (t,x) 358 fun dvar t = 359 let val (a,b) = dest_in t 360 in if b = x then alph a else raise ERR "term_to_bexp.var" "not a var" 361 end 362 val gen = GEN x 363 val e = inst [alpha |-> ty] eval_transitions_tm 364 val t = 365 list_mk_comb (e, [r, s, x]) 366 handle HOL_ERR _ => 367 raise Fail 368 ("export_nfa.trans.list_mk_comb:" ^ 369 " e = " ^ type_to_string (type_of e) ^ 370 ", r = " ^ type_to_string (type_of r) ^ 371 ", s = " ^ type_to_string (type_of s) ^ 372 ", x = " ^ type_to_string (type_of x)) 373 val th = EVAL t 374 val () = if not (chatting 3) then () 375 else chat 3 ("extract_dfa.trans: th =\n"^thm_to_string th^"\n") 376 in 377 mk_condition (mvar,dvar,gen) th 378 end; 379 380 fun export _ _ _ set acc [] = finalize set acc 381 | export alph r trans set acc (tm :: rest) = 382 if member set tm then export alph r trans set acc rest else 383 let 384 val i = Binarymap.numItems set 385 val set = Binarymap.insert (set, tm, i) 386 val a = accept r tm 387 val c = trans alph r tm 388 val rest = harvest_terms rest [c] 389 val acc = (i, tm, a, c) :: acc 390 in 391 export alph r trans set acc rest 392 end; 393in 394 fun extract_dfa alph r = 395 export (mk_alph alph) r trans empty [] [initial r]; 396end; 397 398local 399 fun separator s = 400 let 401 val seps = !LINE_LENGTH - 4 - (size s + 2) 402 val m = seps div 2 - 3 403 val n = seps - m 404 in 405 chs #"-" m ^ " " ^ s ^ " " ^ chs #"-" n 406 end; 407 408 fun claim r = PP.pp_to_string (!LINE_LENGTH - 3) pp_term r; 409 410 fun comment s = 411 String.concat 412 (map (fn x => "// " ^ x ^ "\n") (String.fields (fn c => c = #"\n") s)); 413in 414 fun header n r = comment (separator n ^ "\n" ^ claim r); 415end; 416 417local 418 fun log2 n = 419 Int.toString (Real.ceil (Math.ln (Real.fromInt n) / Math.ln 2.0) - 1); 420 421 fun width l = "[" ^ log2 (length l) ^ ":0]"; 422 423 open PP; 424 425 fun pp_alphs _ _ [] = raise ERR "verilog_dfa" "empty alphabet" 426 | pp_alphs s pp (h :: t) = 427 (add_string pp h; 428 app (fn a => (add_string pp s; add_break pp (1,0); add_string pp a)) t); 429 430 fun pp_condition pp (Leaf (i,_)) = 431 add_string pp ("state = " ^ Int.toString i ^ ";") 432 | pp_condition pp (Branch (c,a,b)) = 433 (begin_block pp CONSISTENT 0; 434 begin_block pp CONSISTENT 2; 435 add_string pp ("if (" ^ c ^ ")"); 436 add_break pp (1,0); 437 pp_condition pp a; 438 end_block pp; 439 add_newline pp; 440 begin_block pp CONSISTENT 2; 441 add_string pp "else"; 442 add_break pp (1,0); 443 pp_condition pp b; 444 end_block pp; 445 end_block pp); 446 447 fun pp_case pp name (i,_,(a,_),t) = 448 (begin_block pp CONSISTENT 2; 449 add_string pp (Int.toString i ^ ":"); 450 add_break pp (1,0); 451 if a then 452 add_string pp 453 ("begin $display (\"" ^ name ^ 454 ": property violated!\"); $finish; end") 455 else pp_condition pp t; 456 end_block pp; 457 add_newline pp; 458 add_newline pp) 459 460 fun pp_module pp (name,alph,table) = 461 (begin_block pp CONSISTENT 0; 462 463 begin_block pp INCONSISTENT (size ("module " ^ name ^ " (")); 464 add_string pp ("module " ^ name ^ " ("); 465 pp_alphs "," pp (alph (*@ ["state"]*)); 466 add_string pp ");"; 467 end_block pp; 468 add_newline pp; 469 add_newline pp; 470 471 begin_block pp INCONSISTENT (size "output" + size (width table) + 2); 472 add_string pp ("input" ^ chs #" " (size (width table) + 3)); 473 pp_alphs "," pp alph; 474 add_string pp ";"; 475 end_block pp; 476 add_newline pp; 477 478(* 479 add_string pp ("output " ^ width table ^ " state;"); 480 add_newline pp; 481*) 482 add_string pp ("reg " ^ width table ^ " state;"); 483 add_newline pp; 484 add_newline pp; 485 486 add_string pp "initial state = 0;"; 487 add_newline pp; 488 add_newline pp; 489 490 begin_block pp INCONSISTENT (size "always @ ("); 491 add_string pp "always @ ("; 492 pp_alphs " or" pp alph; 493 add_string pp ")"; 494 end_block pp; 495 add_newline pp; 496 begin_block pp INCONSISTENT 2; 497 add_string pp "begin"; 498 add_newline pp; 499 add_string pp ("$display (\"" ^ name ^ ": state = %0d\", state);"); 500 add_newline pp; 501 begin_block pp INCONSISTENT 2; 502 add_string pp "case (state)"; 503 add_newline pp; 504 app (pp_case pp name) table; 505 add_string pp 506 ("default: begin $display (\"" ^ name ^ 507 ": unknown state\"); $finish; end"); 508 end_block pp; 509 add_newline pp; 510 add_string pp "endcase"; 511 end_block pp; 512 add_newline pp; 513 add_string pp "end"; 514 add_newline pp; 515 add_newline pp; 516 517 add_string pp "endmodule"; 518 add_newline pp; 519 520 end_block pp); 521in 522 fun module n a r = 523 pp_to_string (!LINE_LENGTH) pp_module 524 (n, map (#Name o dest_thy_const) a, extract_dfa a r); 525end; 526 527fun verilog_dfa {name = n, alphabet = a, regexp = r} = 528 header n r ^ "\n" ^ module n a r; 529 530end