1(* -*-sml-*- *) 2(*---------------------------------------------------------------------------*) 3(* Regular expressions and a regexp matcher. *) 4(* Originated from Konrad Slind, tweaked by MJCG for Accellera PSL SEREs *) 5(* An automata-based matcher added by Joe Hurd *) 6(*---------------------------------------------------------------------------*) 7 8app load ["bossLib", "metisLib", "stringLib", "regexpLib"]; 9 10quietdec := true; 11open HolKernel Parse boolLib bossLib; 12quietdec := false; 13 14(****************************************************************************** 15* Set the trace level of the regular expression library: 16* 0: silent 17* 1: 1 character (either - or +) for each list element processed 18* 2: matches as they are discovered 19* 3: transitions as they are calculated 20* 4: internal state of the automata 21******************************************************************************) 22set_trace "regexpTools" 2; 23 24(*---------------------------------------------------------------------------*) 25(* Examples of the matchers. *) 26(*---------------------------------------------------------------------------*) 27 28fun CHECK r s = Count.apply EVAL 29 (Term `amatch ^r (EXPLODE ^(stringSyntax.fromMLstring s))`); 30 31fun QCHECK q r s = 32 let 33 val th = CHECK r s 34 in 35 if rhs (concl th) = q then th else 36 raise (print ("\n*** FAILED ***\nShould have " ^ term_to_string q ^ 37 ", actually got\n" ^ thm_to_string th ^ "\n"); 38 raise Fail "QCHECK: failed a test") 39 end; 40 41val TCHECK = QCHECK T; 42val FCHECK = QCHECK F; 43 44val Alph_123 = fst(listSyntax.dest_list(rhs(concl(EVAL(Term`EXPLODE"123"`))))); 45val Alph_ab = fst(listSyntax.dest_list(rhs(concl(EVAL(Term`EXPLODE"ab"`))))); 46val Any = ``Dot : char regexp``; 47val Epsilon = ``One : char regexp``; 48val One = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "1")))`))); 49val Two = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "2")))`))); 50val a = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "a")))`))); 51val b = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "b")))`))); 52val c = rhs(concl(EVAL(Term`Atom((=) (HD(EXPLODE "c")))`))); 53 54val r0 = Term `^One # ^Two`; 55val r1 = Term `Repeat (^One # ^Two)`; 56val r2 = Term `Repeat ^Any # ^One`; 57val r3 = Term `^r2 # ^r1`; 58 59TCHECK r0 "12"; 60TCHECK r1 "12"; 61TCHECK r1 "1212"; 62TCHECK r1 "121212121212121212121212"; 63FCHECK r1 "12123"; 64FCHECK r2 ""; 65TCHECK r2 "1"; 66TCHECK r2 "0001"; 67FCHECK r2 "0002"; 68TCHECK r3 "00011212"; 69FCHECK r3 "00011213"; 70TCHECK (Term`Repeat(Repeat ^Any)`) ""; 71TCHECK (Term`Repeat(Repeat ^Any)`) "0"; 72TCHECK (Term`Repeat(Repeat ^Any)`) "0123"; 73TCHECK (Term`Repeat (^Any # Repeat ^Any)`) "0"; 74TCHECK (Term`Repeat (^Any # Repeat ^Any)`) "01"; 75TCHECK (Term`Repeat (^Any # Repeat ^Any)`) "012"; 76TCHECK (Term`^a # Repeat(^a | ^b) # Repeat(^b # ^a)`) "abba"; 77 78(* At most 2 a's. Alphabet = {a,b} *) 79val AtMostTwo_a = Term `Repeat ^b 80 | Repeat ^b # (^a | ^a # Repeat ^b # ^a) # Repeat ^b`; 81 82TCHECK AtMostTwo_a ""; 83TCHECK AtMostTwo_a "b"; 84TCHECK AtMostTwo_a "a"; 85TCHECK AtMostTwo_a "aa"; 86TCHECK AtMostTwo_a "ab"; 87TCHECK AtMostTwo_a "ba"; 88TCHECK AtMostTwo_a "bb"; 89TCHECK AtMostTwo_a "abbbbabbbb"; 90TCHECK AtMostTwo_a "bbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; 91FCHECK AtMostTwo_a "abbbbabbbab"; 92 93(* Exactly 2 a's. Alphabet = {a,b} *) 94val ExactlyTwo_a = Term `Repeat ^b # ^a # Repeat ^b # ^a # Repeat ^b`; 95 96FCHECK ExactlyTwo_a ""; 97FCHECK ExactlyTwo_a "b"; 98FCHECK ExactlyTwo_a "a"; 99TCHECK ExactlyTwo_a "aa"; 100FCHECK ExactlyTwo_a "ab"; 101FCHECK ExactlyTwo_a "ba"; 102FCHECK ExactlyTwo_a "bb"; 103TCHECK ExactlyTwo_a "abbbbabbbb"; 104TCHECK ExactlyTwo_a "bbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbab"; 105FCHECK ExactlyTwo_a "abbbbabbbab"; 106 107(* All strings of length 0-3 *) 108val UpTo3 = Term `^Epsilon | ^Any | ^Any#^Any | ^Any#^Any#^Any`; 109 110TCHECK UpTo3 ""; 111TCHECK UpTo3 "b"; 112TCHECK UpTo3 "a"; 113TCHECK UpTo3 "aa"; 114FCHECK UpTo3 "abbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; 115 116(* All strings with no occurrences of aa or bb *) 117val NoRepeats = Term `^Any | Repeat (^a # ^b) | Repeat (^b # ^a)`; 118 119TCHECK NoRepeats ""; 120TCHECK NoRepeats "a"; 121TCHECK NoRepeats "b"; 122FCHECK NoRepeats "aa"; 123TCHECK NoRepeats "ab"; 124TCHECK NoRepeats "ba"; 125FCHECK NoRepeats "bb"; 126TCHECK NoRepeats "ababababababababababababababababababababababababababab"; 127FCHECK NoRepeats "abababababababababababbababababababababababababababab"; 128 129(* Some tests of the Prefix operator *) 130val Prefix12 = Term `Prefix ^r0`; 131val PrefixExactlyTwo_a = 132 Term `Prefix (Repeat ^b # ^a # Repeat ^b # ^a # Repeat ^b)`; 133 134TCHECK Prefix12 "1"; 135FCHECK Prefix12 "11"; 136TCHECK PrefixExactlyTwo_a ""; 137 138(* Some tests of the export functions *) 139val Prefix12_dfa = try (regexpLib.export_dfa Alph_123) Prefix12; 140 141(*---------------------------------------------------------------------------*) 142