1(* -*-sml-*- *)
2(*---------------------------------------------------------------------------*)
3(* Regular expressions and a regexp matcher.                                 *)
4(* Originated from Konrad Slind, tweaked by MJCG for Accellera PSL SEREs     *)
5(* An automata-based matcher added by Joe Hurd                               *)
6(*---------------------------------------------------------------------------*)
7
8app load ["bossLib", "metisLib", "stringLib", "regexpLib"];
9
10quietdec := true;
11open HolKernel Parse boolLib bossLib;
12quietdec := false;
13
14(******************************************************************************
15* Set the trace level of the regular expression library:      
16* 0: silent
17* 1: 1 character (either - or +) for each list element processed
18* 2: matches as they are discovered
19* 3: transitions as they are calculated
20* 4: internal state of the automata
21******************************************************************************)
22set_trace "regexpTools" 2;
23
24(*---------------------------------------------------------------------------*)
25(* Examples of the matchers.                                                 *)
26(*---------------------------------------------------------------------------*)
27
28fun CHECK r s = Count.apply EVAL 
29                  (Term `amatch ^r (EXPLODE ^(stringSyntax.fromMLstring s))`);
30
31fun QCHECK q r s =
32  let
33    val th = CHECK r s
34  in
35    if rhs (concl th) = q then th else
36      raise (print ("\n*** FAILED ***\nShould have " ^ term_to_string q ^
37                    ", actually got\n" ^ thm_to_string th ^ "\n");
38             raise Fail "QCHECK: failed a test")
39  end;
40
41val TCHECK = QCHECK T;
42val FCHECK = QCHECK F;
43
44val Alph_123 = fst(listSyntax.dest_list(rhs(concl(EVAL(Term`EXPLODE"123"`)))));
45val Alph_ab = fst(listSyntax.dest_list(rhs(concl(EVAL(Term`EXPLODE"ab"`)))));
46val Any = ``Dot : char regexp``;
47val Epsilon = ``One : char regexp``;
48val One = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "1")))`)));
49val Two = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "2")))`)));
50val a = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "a")))`)));
51val b = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "b")))`)));
52val c = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "c")))`)));
53
54val r0 = Term `^One # ^Two`;
55val r1 = Term `Repeat (^One # ^Two)`;
56val r2 = Term `Repeat ^Any # ^One`;
57val r3 = Term `^r2 # ^r1`;
58
59TCHECK r0 "12";
60TCHECK r1 "12";
61TCHECK r1 "1212";
62TCHECK r1 "121212121212121212121212";
63FCHECK r1 "12123";
64FCHECK r2 "";
65TCHECK r2 "1";
66TCHECK r2 "0001";
67FCHECK r2 "0002";
68TCHECK r3 "00011212";
69FCHECK r3 "00011213";
70TCHECK (Term`Repeat(Repeat ^Any)`) "";
71TCHECK (Term`Repeat(Repeat ^Any)`) "0";
72TCHECK (Term`Repeat(Repeat ^Any)`) "0123";
73TCHECK (Term`Repeat (^Any # Repeat ^Any)`) "0";
74TCHECK (Term`Repeat (^Any # Repeat ^Any)`) "01";
75TCHECK (Term`Repeat (^Any # Repeat ^Any)`) "012";
76TCHECK (Term`^a # Repeat(^a | ^b) # Repeat(^b # ^a)`) "abba";
77
78(* At most 2 a's. Alphabet = {a,b} *)
79val AtMostTwo_a = Term `Repeat ^b 
80                     |  Repeat ^b # (^a | ^a # Repeat ^b # ^a) # Repeat ^b`;
81
82TCHECK AtMostTwo_a "";
83TCHECK AtMostTwo_a "b";
84TCHECK AtMostTwo_a "a";
85TCHECK AtMostTwo_a "aa";
86TCHECK AtMostTwo_a "ab";
87TCHECK AtMostTwo_a "ba";
88TCHECK AtMostTwo_a "bb";
89TCHECK AtMostTwo_a "abbbbabbbb";
90TCHECK AtMostTwo_a "bbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
91FCHECK AtMostTwo_a "abbbbabbbab";
92
93(* Exactly 2 a's. Alphabet = {a,b} *)
94val ExactlyTwo_a = Term `Repeat ^b # ^a # Repeat ^b # ^a # Repeat ^b`;
95
96FCHECK ExactlyTwo_a "";
97FCHECK ExactlyTwo_a "b";
98FCHECK ExactlyTwo_a "a";
99TCHECK ExactlyTwo_a "aa";
100FCHECK ExactlyTwo_a "ab";
101FCHECK ExactlyTwo_a "ba";
102FCHECK ExactlyTwo_a "bb";
103TCHECK ExactlyTwo_a "abbbbabbbb";
104TCHECK ExactlyTwo_a "bbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbab";
105FCHECK ExactlyTwo_a "abbbbabbbab";
106
107(* All strings of length 0-3 *)
108val UpTo3 = Term `^Epsilon | ^Any | ^Any#^Any | ^Any#^Any#^Any`;
109
110TCHECK UpTo3 "";
111TCHECK UpTo3 "b";
112TCHECK UpTo3 "a";
113TCHECK UpTo3 "aa";
114FCHECK UpTo3 "abbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
115
116(* All strings with no occurrences of aa or bb *)
117val NoRepeats = Term `^Any | Repeat (^a # ^b) | Repeat (^b # ^a)`;
118
119TCHECK NoRepeats "";
120TCHECK NoRepeats "a";
121TCHECK NoRepeats "b";
122FCHECK NoRepeats "aa";
123TCHECK NoRepeats "ab";
124TCHECK NoRepeats "ba";
125FCHECK NoRepeats "bb";
126TCHECK NoRepeats "ababababababababababababababababababababababababababab";
127FCHECK NoRepeats "abababababababababababbababababababababababababababab";
128
129(* Some tests of the Prefix operator *)
130val Prefix12 = Term `Prefix ^r0`;
131val PrefixExactlyTwo_a =
132  Term `Prefix (Repeat ^b # ^a # Repeat ^b # ^a # Repeat ^b)`;
133
134TCHECK Prefix12 "1";
135FCHECK Prefix12 "11";
136TCHECK PrefixExactlyTwo_a "";
137
138(* Some tests of the export functions *)
139val Prefix12_dfa = try (regexpLib.export_dfa Alph_123) Prefix12;
140
141(*---------------------------------------------------------------------------*)
142