1(*---------------------------------------------------------------------------*)
2(* Regular expressions and a regexp matcher.                                 *)
3(* Originated from Konrad Slind, tweaked by MJCG for Accellera PSL SEREs     *)
4(* An automata-based matcher added by Joe Hurd                               *)
5(*---------------------------------------------------------------------------*)
6
7structure regexpTools :> regexpTools =
8struct
9
10(*
11app load ["bossLib", "metisLib", "stringLib", "matcherTheory"];
12*)
13
14open HolKernel Parse boolLib;
15open bossLib metisLib pairTheory combinTheory listTheory rich_listTheory
16     pred_setTheory arithmeticTheory;
17open regexpTheory matcherTheory;
18
19(*---------------------------------------------------------------------------*)
20(* Tracing.                                                                  *)
21(*---------------------------------------------------------------------------*)
22
23(******************************************************************************
24* The trace levels of the regular expression library:
25* 0: silent
26* 1: 1 character (either - or +) for each list element processed
27* 2: matches as they are discovered
28* 3: transitions as they are calculated
29* 4: internal state of the automata
30******************************************************************************)
31
32val trace_level = ref 1;
33val () = register_trace ("regexpTools", trace_level, 4);
34fun chatting n = n <= !trace_level;
35fun chat n s = if chatting n then Lib.say s else ();
36
37(*---------------------------------------------------------------------------*)
38(* Terms.                                                                    *)
39(*---------------------------------------------------------------------------*)
40
41val initial_regexp2na_tm = ``initial_regexp2na : 'a regexp -> num``;
42val accept_regexp2na_tm = ``accept_regexp2na : 'a regexp -> num -> bool``;
43val transition_regexp2na_tm =
44  ``transition_regexp2na : 'a regexp -> num -> 'a -> num -> bool``;
45val eval_accepts_tm = ``eval_accepts : 'a regexp -> num list -> bool``;
46val eval_transitions_tm =
47  ``eval_transitions : 'a regexp -> num list -> 'a -> num list``;
48val exists_transition_regexp2na_tm =
49  ``exists_transition_regexp2na : 'a regexp -> num -> num -> bool``;
50val areport_tm = ``areport : 'a -> 'b -> 'b``;
51
52(*---------------------------------------------------------------------------*)
53(* Function caches.                                                          *)
54(*---------------------------------------------------------------------------*)
55
56fun cache order f =
57  let
58    val cache = ref (Binarymap.mkDict order)
59  in
60    fn x =>
61    case Binarymap.peek (!cache, x) of
62      SOME y => (true, y)
63    | NONE =>
64        let
65          val y = f x
66          val () = cache := Binarymap.insert (!cache, x, y)
67        in
68          (false, y)
69        end
70  end;
71
72(*---------------------------------------------------------------------------*)
73(* Executing the semantic-driven matcher.                                    *)
74(*---------------------------------------------------------------------------*)
75
76val () = computeLib.add_funs [LAST_DEF];
77
78(*---------------------------------------------------------------------------*)
79(* Executing the automata matcher.                                           *)
80(*---------------------------------------------------------------------------*)
81
82fun cache_conv m n conv =
83  let
84    val cconv = cache compare conv
85  in
86    fn tm =>
87    let
88      val (hit, th) = cconv tm
89      val _ = chat m (if hit then "+" else "-")
90      val _ = if chatting n then Lib.say ("\n" ^ thm_to_string th) else ()
91    in
92      th
93    end
94  end;
95
96local
97  val t_or = CONJUNCT1 (SPEC_ALL OR_CLAUSES);
98
99  fun witness_conv tm =
100    let
101      val () = if not (chatting 4) then ()
102               else Lib.say ("\nwitness_conv: " ^ term_to_string tm)
103      val (x,b) = dest_exists tm
104      val (ty,_) = dom_rng (type_of x)
105      val conjs = strip_conj b
106      val emp_tm = inst [alpha |-> ty] pred_setSyntax.empty_tm
107      val ins_tm = inst [alpha |-> ty] pred_setSyntax.insert_tm
108      fun ins (v,s) = mk_comb (mk_comb (ins_tm, rand (rator v)), s)
109      val set = foldr ins emp_tm (filter (not o is_neg) conjs)
110      val goal = subst [x |-> set] b
111      val wit_th = EQT_ELIM (EVAL goal)
112      val ex_th = EXISTS (tm, set) wit_th
113    in
114      EQT_INTRO ex_th
115    end;
116
117  val sat_conv =
118    QUANT_CONV normalForms.DNF_CONV
119    THENC (REWR_CONV boolTheory.EXISTS_SIMP
120           ORELSEC (EXISTS_OR_CONV
121                    THENC LAND_CONV witness_conv
122                    THENC REWR_CONV t_or)
123           ORELSEC witness_conv);
124in
125  fun set_sat_conv tm =
126    let
127      val () = if not (chatting 4) then ()
128               else Lib.say ("\nset_sat_conv: " ^ term_to_string tm)
129    in
130      sat_conv tm
131      handle e as HOL_ERR _ => Raise e
132    end;
133 end;
134
135local
136  val chr_ss = simpLib.++ (boolSimps.bool_ss, numSimps.REDUCE_ss);
137in
138  val chr_sat_conv =
139    QUANT_CONV normalForms.DNF_CONV
140    THENC SIMP_CONV chr_ss [chr_11, chr_suff, chr_suff1];
141end;
142
143val prefix_sat_conv = ref set_sat_conv;
144
145local
146  fun exists_conv tm =
147    (ONCE_REWRITE_CONV [exists_transition_regexp2na_def]
148     THENC QUANT_CONV EVAL
149     THENC !prefix_sat_conv) tm;
150in
151  val exists_transition_regexp2na_conv = cache_conv 3 4 exists_conv;
152end;
153
154val initial_regexp2na_conv =
155  cache_conv 3 4 (ONCE_REWRITE_CONV [initial_regexp2na] THENC EVAL);
156
157val accept_regexp2na_conv =
158  cache_conv 3 4 (ONCE_REWRITE_CONV [accept_regexp2na] THENC EVAL);
159
160val transition_regexp2na_conv =
161  cache_conv 3 4 (ONCE_REWRITE_CONV [transition_regexp2na] THENC EVAL);
162
163val eval_accepts_conv =
164  cache_conv 2 3 (ONCE_REWRITE_CONV [eval_accepts_def] THENC EVAL);
165
166val eval_transitions_conv =
167  cache_conv 1 3 (ONCE_REWRITE_CONV [eval_transitions_def] THENC EVAL);
168
169local
170  fun hol_rev tm =
171    let val (l,ty) = listSyntax.dest_list tm
172    in listSyntax.mk_list (rev l, ty)
173    end;
174in
175  fun areport_conv tm =
176    let
177      val l = hol_rev (rand (rator tm))
178      val () = if not (chatting 2) then ()
179               else Lib.say ("\nmatch: " ^ term_to_string l)
180    in
181      REWR_CONV areport_def
182    end tm;
183end;
184
185val () = computeLib.add_funs
186  [(* Prefer the cached conversions of
187      initial_regexp2na, accept_regexp2na, transition_regexp2na,
188      eval_accepts, eval_transitions *)
189   matcherTheory.astep_def,
190   matcherTheory.dijkstra_def];
191
192val () = computeLib.add_convs
193  [(initial_regexp2na_tm,           1, initial_regexp2na_conv),
194   (accept_regexp2na_tm,            2, accept_regexp2na_conv),
195   (transition_regexp2na_tm,        4, transition_regexp2na_conv),
196   (eval_accepts_tm,                2, eval_accepts_conv),
197   (eval_transitions_tm,            3, eval_transitions_conv),
198   (exists_transition_regexp2na_tm, 3, exists_transition_regexp2na_conv),
199   (areport_tm,                     2, areport_conv)];
200
201(*---------------------------------------------------------------------------*)
202(* Speed up the evaluation of very long lists.                               *)
203(*---------------------------------------------------------------------------*)
204
205local
206  val dropize =
207    (CONV_RULE o LAND_CONV o ONCE_REWRITE_CONV) [GSYM (CONJUNCT1 drop_def)];
208
209  fun dest_single l =
210    let
211      val (h,t) = listSyntax.dest_cons l
212      val _ = listSyntax.is_nil t orelse raise ERR "dest_single" ""
213    in
214      h
215    end;
216
217  val is_single = can dest_single;
218
219  val reduce = CONV_RULE (LAND_CONV reduceLib.REDUCE_CONV);
220
221  fun loop acc th =
222    let
223      val acc = MATCH_MP head_drop th :: acc
224    in
225      if is_single (snd (dest_eq (concl th))) then
226        CONV_RULE reduceLib.REDUCE_CONV (MATCH_MP length_drop th) :: acc
227      else loop acc (reduce (MATCH_MP tail_drop th))
228    end;
229in
230  fun EVAL_BIGLIST def = let val def = dropize def in loop [def] def end;
231end;
232
233(*---------------------------------------------------------------------------*)
234(* Export a regular expression as a Verilog state machine.                   *)
235(*---------------------------------------------------------------------------*)
236
237val LINE_LENGTH = ref 79;
238
239local fun dup _ 0 l = l | dup x n l = dup x (n - 1) (x :: l);
240in fun chs x n = implode (dup x n []);
241end;
242
243fun DISCH_CONJ [] = I
244  | DISCH_CONJ [x] = DISCH x
245  | DISCH_CONJ (x :: (xs as _ :: _)) =
246  CONV_RULE (REWR_CONV AND_IMP_INTRO) o DISCH x o DISCH_CONJ xs;
247
248fun all_subsets [] = [[]]
249  | all_subsets (x :: xs) =
250  let val y = all_subsets xs
251  in map (cons (x,true)) y @ map (cons (x,false)) y
252  end;
253
254val dest_in = dest_binop ``(IN)`` (ERR "dest_in" "");
255
256fun mk_alph s =
257  let
258    val t = map (rhs o concl o EVAL) s
259    val ts = zip t s
260  in
261    fn x => assoc x ts
262  end;
263
264datatype 'a condition =
265  Leaf of 'a
266| Branch of string * 'a condition * 'a condition;
267
268local
269  val s = ``s : num -> bool``;
270
271  val empty : (term,int) Binarymap.dict = Binarymap.mkDict compare;
272
273  fun member (m : (term,int) Binarymap.dict) t =
274    Option.isSome (Binarymap.peek (m, t));
275
276  fun harvest_terms acc [] = acc
277    | harvest_terms acc (Leaf (tm,_) :: l) = harvest_terms (tm :: acc) l
278    | harvest_terms acc (Branch (_,a,b) :: l) = harvest_terms acc (a :: b :: l);
279
280  fun finalize (set : (term,int) Binarymap.dict) =
281    let
282      fun f (Leaf (tm,th)) = Leaf (Binarymap.find (set,tm), th)
283        | f (Branch (c,a,b)) = Branch (c, f a, f b)
284      fun g acc [] = acc
285        | g acc ((i,t,a,c) :: r) = g ((i, t, a, f c) :: acc) r
286    in
287      g []
288    end;
289
290  fun initial r =
291    let
292      val ty = (hd o snd o dest_type o type_of) r
293      val e = inst [alpha |-> ty] initial_regexp2na_tm
294      val t =
295        mk_comb (e, r)
296        handle HOL_ERR _ =>
297          raise Fail
298            ("export_nfa.initial.mk_comb: " ^
299             " e = " ^ type_to_string (type_of e) ^
300             ", r = " ^ type_to_string (type_of r))
301      val res = rhs (concl (EVAL t))
302    in
303      listSyntax.mk_list ([res], numSyntax.num)
304    end;
305
306  fun accept r s =
307    let
308      val ty = (hd o snd o dest_type o type_of) r
309      val e = inst [alpha |-> ty] eval_accepts_tm
310      val t =
311        list_mk_comb (e, [r, s])
312        handle HOL_ERR _ => raise Fail "export_nfa.accept.list_mk_comb"
313      val th = EVAL t
314      val res = rhs (concl th)
315      val _ =
316        res = T orelse res = F orelse
317        raise ERR "export_nfa.accept" "couldn't reduce eval_accepts"
318    in
319      (res = T, th)
320    end;
321
322  fun simp tm =
323    let val th = CONV_RULE EVAL (ASSUME tm)
324    in CONV_RULE (RAND_CONV (SIMP_CONV boolSimps.bool_ss [th]))
325    end;
326
327  fun mk_condition (m,d,gen) =
328    let
329      fun g bs = GEN s o SPEC s o gen o DISCH_CONJ (rev bs)
330
331      fun f bs th =
332        let val (_,tm) = dest_eq (concl th) in
333          case total (dest_cond o find_term is_cond) tm of
334            NONE => Leaf (tm, g bs th)
335          | SOME (c,_,_) =>
336            let
337              val () = if not (chatting 3) then ()
338                       else chat 3 ("extract_dfa.mk_condition: th =\n" ^
339                                    thm_to_string th ^ "\n")
340              val t = find_term (can d) c
341              val v = d t
342              val s = #Name (dest_thy_const v)
343              val tm = m v
344              val tm' = mk_neg tm
345            in
346              Branch (s, f (tm :: bs) (simp tm th), f (tm' :: bs) (simp tm' th))
347            end
348        end
349    in
350      f []
351    end;
352
353  fun trans alph r s =
354    let
355      val ty = (hd o snd o dest_type o type_of) r
356      val x = genvar ty
357      fun mvar t = pred_setSyntax.mk_in (t,x)
358      fun dvar t =
359        let val (a,b) = dest_in t
360        in if b = x then alph a else raise ERR "term_to_bexp.var" "not a var"
361        end
362      val gen = GEN x
363      val e = inst [alpha |-> ty] eval_transitions_tm
364      val t =
365        list_mk_comb (e, [r, s, x])
366        handle HOL_ERR _ =>
367          raise Fail
368            ("export_nfa.trans.list_mk_comb:" ^
369             " e = " ^ type_to_string (type_of e) ^
370             ", r = " ^ type_to_string (type_of r) ^
371             ", s = " ^ type_to_string (type_of s) ^
372             ", x = " ^ type_to_string (type_of x))
373      val th = EVAL t
374      val () = if not (chatting 3) then ()
375               else chat 3 ("extract_dfa.trans: th =\n"^thm_to_string th^"\n")
376    in
377      mk_condition (mvar,dvar,gen) th
378    end;
379
380  fun export _ _ _ set acc [] = finalize set acc
381    | export alph r trans set acc (tm :: rest) =
382    if member set tm then export alph r trans set acc rest else
383      let
384        val i = Binarymap.numItems set
385        val set = Binarymap.insert (set, tm, i)
386        val a = accept r tm
387        val c = trans alph r tm
388        val rest = harvest_terms rest [c]
389        val acc = (i, tm, a, c) :: acc
390      in
391        export alph r trans set acc rest
392      end;
393in
394  fun extract_dfa alph r =
395    export (mk_alph alph) r trans empty [] [initial r];
396end;
397
398local
399  fun separator s =
400    let
401      val seps = !LINE_LENGTH - 4 - (size s + 2)
402      val m = seps div 2 - 3
403      val n = seps - m
404    in
405      chs #"-" m ^ " " ^ s ^ " " ^ chs #"-" n
406    end;
407
408  fun claim r = PP.pp_to_string (!LINE_LENGTH - 3) pp_term r;
409
410  fun comment s =
411    String.concat
412    (map (fn x => "// " ^ x ^ "\n") (String.fields (fn c => c = #"\n") s));
413in
414  fun header n r = comment (separator n ^ "\n" ^ claim r);
415end;
416
417local
418  fun log2 n =
419    Int.toString (Real.ceil (Math.ln (Real.fromInt n) / Math.ln 2.0) - 1);
420
421  fun width l = "[" ^ log2 (length l) ^ ":0]";
422
423  open PP;
424
425  fun pp_alphs _ _ [] = raise ERR "verilog_dfa" "empty alphabet"
426    | pp_alphs s pp (h :: t) =
427    (add_string pp h;
428     app (fn a => (add_string pp s; add_break pp (1,0); add_string pp a)) t);
429
430  fun pp_condition pp (Leaf (i,_)) =
431    add_string pp ("state = " ^ Int.toString i ^ ";")
432    | pp_condition pp (Branch (c,a,b)) =
433    (begin_block pp CONSISTENT 0;
434     begin_block pp CONSISTENT 2;
435     add_string pp ("if (" ^ c ^ ")");
436     add_break pp (1,0);
437     pp_condition pp a;
438     end_block pp;
439     add_newline pp;
440     begin_block pp CONSISTENT 2;
441     add_string pp "else";
442     add_break pp (1,0);
443     pp_condition pp b;
444     end_block pp;
445     end_block pp);
446
447  fun pp_case pp name (i,_,(a,_),t) =
448    (begin_block pp CONSISTENT 2;
449     add_string pp (Int.toString i ^ ":");
450     add_break pp (1,0);
451     if a then
452       add_string pp
453         ("begin $display (\"" ^ name ^
454          ": property violated!\"); $finish; end")
455     else pp_condition pp t;
456     end_block pp;
457     add_newline pp;
458     add_newline pp)
459
460  fun pp_module pp (name,alph,table) =
461    (begin_block pp CONSISTENT 0;
462
463     begin_block pp INCONSISTENT (size ("module " ^ name ^ " ("));
464     add_string pp ("module " ^ name ^ " (");
465     pp_alphs "," pp (alph (*@ ["state"]*));
466     add_string pp ");";
467     end_block pp;
468     add_newline pp;
469     add_newline pp;
470
471     begin_block pp INCONSISTENT (size "output" + size (width table) + 2);
472     add_string pp ("input" ^ chs #" " (size (width table) + 3));
473     pp_alphs "," pp alph;
474     add_string pp ";";
475     end_block pp;
476     add_newline pp;
477
478(*
479     add_string pp ("output " ^ width table ^ " state;");
480     add_newline pp;
481*)
482     add_string pp ("reg    " ^ width table ^ " state;");
483     add_newline pp;
484     add_newline pp;
485
486     add_string pp "initial state = 0;";
487     add_newline pp;
488     add_newline pp;
489
490     begin_block pp INCONSISTENT (size "always @ (");
491     add_string pp "always @ (";
492     pp_alphs " or" pp alph;
493     add_string pp ")";
494     end_block pp;
495     add_newline pp;
496     begin_block pp INCONSISTENT 2;
497     add_string pp "begin";
498     add_newline pp;
499     add_string pp ("$display (\"" ^ name ^ ": state = %0d\", state);");
500     add_newline pp;
501     begin_block pp INCONSISTENT 2;
502     add_string pp "case (state)";
503     add_newline pp;
504     app (pp_case pp name) table;
505     add_string pp
506       ("default: begin $display (\"" ^ name ^
507        ": unknown state\"); $finish; end");
508     end_block pp;
509     add_newline pp;
510     add_string pp "endcase";
511     end_block pp;
512     add_newline pp;
513     add_string pp "end";
514     add_newline pp;
515     add_newline pp;
516
517     add_string pp "endmodule";
518     add_newline pp;
519
520     end_block pp);
521in
522  fun module n a r =
523    pp_to_string (!LINE_LENGTH) pp_module
524      (n, map (#Name o dest_thy_const) a, extract_dfa a r);
525end;
526
527fun verilog_dfa {name = n, alphabet = a, regexp = r} =
528  header n r ^ "\n" ^ module n a r;
529
530end