1(*===========================================================================*) 2(* Regular expression matcher *) 3(* Author: Scott Owens *) 4(*===========================================================================*) 5 6(*---------------------------------------------------------------------------*) 7(* SML code *) 8(* *) 9(* datatype ''a re *) 10(* = Epsilon *) 11(* | Charset of ''a list *) 12(* | Or of ''a re * ''a re *) 13(* | Then of ''a re * ''a re *) 14(* | Repeat of ''a re; *) 15(* *) 16(* Match: ''a re list -> ''a list -> ''a re list option -> bool *) 17(* *) 18(* fun Match [] [] _ = true *) 19(* | Match [] _ _ = false *) 20(* | Match (Epsilon::t) w s = Match t w s *) 21(* | Match (Charset c::t) [] s = false *) 22(* | Match (Charset c::t) (d::w) s = c d andalso Match t w NONE *) 23(* | Match (Or(r1,r2)::t) w s = Match (r1::t) w s orelse Match (r2::t) w s *) 24(* | Match (Then(r1,r2)::t) w s = Match (r1::r2::t) w s *) 25(* | Match (Repeat r::t) w s = *) 26(* let val R = SOME (Repeat r::t) *) 27(* in if s = R then false *) 28(* else Match t w s orelse Match (r::Repeat r::t) w R *) 29(* end *) 30(* *) 31(*---------------------------------------------------------------------------*) 32 33 34(* ------------------------------------------------------------------------- *) 35(* HOL Preliminaries *) 36(* ------------------------------------------------------------------------- *) 37 38quietdec := true; 39app load ["stringLib"]; 40open pairTheory arithmeticTheory listTheory optionTheory; 41quietdec := false; 42 43val thm_counter = Count.mk_meter(); 44 45(*---------------------------------------------------------------------------*) 46(* Change free variable names to desired ones. Takes a list of (old,new) *) 47(* pairs (strings). *) 48(*---------------------------------------------------------------------------*) 49 50fun RENAME_FREES_TAC [] = ALL_TAC 51 | RENAME_FREES_TAC ((old,new)::t) = 52 Q.SPEC_TAC([QUOTE old],[QUOTE new]) THEN GEN_TAC THEN RENAME_FREES_TAC t; 53 54(*---------------------------------------------------------------------------*) 55(* Miscellaneous lemma; should already exist in HOL *) 56(*---------------------------------------------------------------------------*) 57 58val MEM_EXISTS_LEM = Q.prove 59(`!x l. MEM x l = EXISTS (\y. y = x) l`, 60 Induct_on `l` THEN EVAL_TAC THEN METIS_TAC []); 61 62val _ = new_theory "regexp"; 63 64(*--------------------------------------------------------------------------*) 65(* Datatype of regular expressions. Note that by having Charset take a *) 66(* characteristic function means that regexp is not computably testable for *) 67(* equality. Alternative: have Charset be a list or a finite map ... *) 68(*--------------------------------------------------------------------------*) 69 70Hol_datatype 71 `regexp = Epsilon (* Empty string *) 72 | Charset of 'a -> bool (* Character set *) 73 | Or of regexp => regexp (* Union *) 74 | Then of regexp => regexp (* Concatenation *) 75 | Repeat of regexp`; (* Iterated concat, >= 0 *) 76 77(*---------------------------------------------------------------------------*) 78(* Parser fiddling to get | and # as infixes - we have to first get rid *) 79(* of their pre-defined behaviour. *) 80(*---------------------------------------------------------------------------*) 81 82val _ = overload_on ("+", Term`$Or`); 83val _ = overload_on ("#", Term`$Then`); 84 85val _ = set_fixity "+" (Infixr 501); 86val _ = set_fixity "#" (Infixr 601); 87 88val regexp_cases = TypeBase.nchotomy_of ``:'a regexp``; 89 90(*---------------------------------------------------------------------------*) 91(* Semantics of regular expressions. sem r w means that word w (represented *) 92(* as a list) is in the language of regular expression r. *) 93(*---------------------------------------------------------------------------*) 94 95val sem_def = 96 Define 97 `(sem Epsilon w = (w = [])) /\ 98 (sem (Charset C) w = (?x. (w = [x]) /\ C x)) /\ 99 (sem (r1 + r2) w = sem r1 w \/ sem r2 w) /\ 100 (sem (r1 # r2) w = ?w1 w2. (w = w1 ++ w2) /\ sem r1 w1 /\ sem r2 w2) /\ 101 (sem (Repeat r) w = ?wlist. (w = FLAT wlist) /\ EVERY (sem r) wlist)`; 102 103(*---------------------------------------------------------------------------*) 104(* Some basic equivalences for regular expressions. *) 105(*---------------------------------------------------------------------------*) 106 107val regexp_repeat_thm = Q.prove 108(`!r w.sem (Repeat r) w = sem (Epsilon + (r # (Repeat r))) w`, 109 RW_TAC std_ss [] THEN EQ_TAC THEN RW_TAC list_ss [sem_def, FLAT] 110 THENL [Cases_on `wlist` THEN RW_TAC list_ss [FLAT], ALL_TAC, ALL_TAC] 111 THEN METIS_TAC [FLAT,EVERY_DEF]); 112 113val repeat_to_ncat_thm = Q.prove 114(`!r w.sem (Repeat r) w ==> ?n. sem (FUNPOW ($# r) n Epsilon) w`, 115 RW_TAC list_ss [sem_def, FLAT, FUNPOW_SUC] 116 THEN Q.EXISTS_TAC `LENGTH wlist` 117 THEN Induct_on `wlist` 118 THEN RW_TAC list_ss [sem_def, FLAT, FUNPOW_SUC] 119 THEN EVAL_TAC THEN METIS_TAC []); 120 121val ncat_to_repeat_thm = Q.prove 122(`!r w n. sem (FUNPOW ($# r) n Epsilon) w ==> sem (Repeat r) w`, 123 Induct_on `n` THENL [ALL_TAC, POP_ASSUM MP_TAC] THEN EVAL_TAC 124 THEN RW_TAC list_ss [sem_def,FUNPOW_SUC] THENL 125 [METIS_TAC [FLAT, EVERY_DEF], 126 RES_TAC THEN Q.EXISTS_TAC `w1::wlist` THEN METIS_TAC [EVERY_DEF,FLAT]]); 127 128val repeat_equals_ncat_thm = Q.prove 129(`!r w. sem (Repeat r) w = ?n. sem (FUNPOW ($# r) n Epsilon) w`, 130 METIS_TAC [ncat_to_repeat_thm, repeat_to_ncat_thm]); 131 132 133(* ------------------------------------------------------------------------- *) 134(* Regular expression matching algorithm *) 135(* ------------------------------------------------------------------------- *) 136 137val match_defn = 138 Hol_defn 139 "match" 140 `(match [] [] _ = T) 141/\ (match [] _ _ = F) 142/\ (match (Epsilon::t) w s = match t w s) 143/\ (match (Charset C::t) [] s = F) 144/\ (match (Charset C::t) (d::w) s = C d /\ match t w NONE) 145/\ (match ((r1 + r2)::t) w s = match (r1::t) w s \/ match (r2::t) w s) 146/\ (match ((r1 # r2)::t) w s = match (r1::r2::t) w s) 147/\ (match (Repeat r::t) w s = 148 if s = SOME (Repeat r::t) then 149 F 150 else 151 match t w s \/ match (r::Repeat r::t) w (SOME (Repeat r::t)))`; 152 153(*---------------------------------------------------------------------------*) 154(* Termination proof *) 155(*---------------------------------------------------------------------------*) 156 157(*---------------------------------------------------------------------------*) 158(* Size of a regular expression, needed because the default metric gives *) 159(* Epsilon size 0 which won't work for the matcher's termination proof. *) 160(*---------------------------------------------------------------------------*) 161 162val my_regexp_size_def = 163 Define 164 `(my_regexp_size Epsilon = 1) /\ 165 (my_regexp_size (Charset _) = 1) /\ 166 (my_regexp_size (r1 + r2) = 1 + my_regexp_size r1 + my_regexp_size r2) /\ 167 (my_regexp_size (r1 # r2) = 1 + my_regexp_size r1 + my_regexp_size r2) /\ 168 (my_regexp_size (Repeat r) = 1 + my_regexp_size r)`; 169 170(*---------------------------------------------------------------------------*) 171(* Star height of a regular expression *) 172(*---------------------------------------------------------------------------*) 173 174val starHeight_def = 175 Define 176 `(starHeight Epsilon = 0) /\ 177 (starHeight (Charset _) = 0) /\ 178 (starHeight (r1 + r2) = MAX (starHeight r1) (starHeight r2)) /\ 179 (starHeight (r1 # r2) = MAX (starHeight r1) (starHeight r2)) /\ 180 (starHeight (Repeat r) = 1 + starHeight r)`; 181 182val front_starHeight_def = 183 Define 184 `(front_starHeight [] sl = 0) /\ 185 (front_starHeight (h::t) sl = 186 if (sl = SOME (h::t)) /\ ?r. h = Repeat r 187 then 0 188 else MAX (starHeight h) (front_starHeight t sl))`; 189 190(*---------------------------------------------------------------------------*) 191(* SUM (MAP my_regexp_size res) doesn't count the conses in the list like *) 192(* the builtin list measure does. This is important for termination proof. *) 193(*---------------------------------------------------------------------------*) 194 195val term_meas_def = 196 Define 197 `term_meas (res, str, stop) = 198 (LENGTH str, 199 front_starHeight res stop, 200 SUM (MAP my_regexp_size res))`; 201 202val (match_def, match_ind) = Defn.tprove 203(match_defn, 204 WF_REL_TAC `inv_image ($< LEX $< LEX $<) term_meas` THEN 205 RW_TAC list_ss [term_meas_def, my_regexp_size_def] THEN 206 PURE_ONCE_REWRITE_TAC [GSYM arithmeticTheory.LESS_OR_EQ] THEN 207 RW_TAC list_ss [front_starHeight_def, starHeight_def]); 208 209 210(* ------------------------------------------------------------------------- *) 211(* Correctness Proof *) 212(* ------------------------------------------------------------------------- *) 213 214(*---------------------------------------------------------------------------*) 215(* Match implies the semantics *) 216(*---------------------------------------------------------------------------*) 217 218val match_implies_sem = Q.prove 219(`!rl w s. match rl w s ==> sem (FOLDR $# Epsilon rl) w`, 220 recInduct match_ind THEN RW_TAC list_ss [match_def, sem_def] THENL 221 [METIS_TAC [APPEND], 222 METIS_TAC [], 223 METIS_TAC [], 224 FULL_SIMP_TAC list_ss [] THEN METIS_TAC [], 225 MAP_EVERY Q.EXISTS_TAC [`[]`, `w`] 226 THEN RW_TAC list_ss [] THEN METIS_TAC [FLAT,EVERY_DEF], 227 FULL_SIMP_TAC list_ss [] 228 THEN MAP_EVERY Q.EXISTS_TAC [`w1 ++ FLAT wlist`, `w2'`] 229 THEN RW_TAC list_ss [] 230 THEN METIS_TAC [FLAT,EVERY_DEF]]); 231 232(*---------------------------------------------------------------------------*) 233(* Semantics imply match *) 234(*---------------------------------------------------------------------------*) 235 236(*---------------------------------------------------------------------------*) 237(* A match sequence is a witness to a path through the match algorithm that *) 238(* ends with T. *) 239(*---------------------------------------------------------------------------*) 240 241val m_def = 242 Define 243 `(m (Epsilon::t,w,s) x = (x = (t,w,s))) /\ 244 (m (Charset C::t,d::w,s) x = (x = (t,w,NONE)) /\ C d) /\ 245 (m ((r1 + r2)::t, w, s) x = (x = (r1::t,w,s)) \/ (x = (r2::t,w,s))) /\ 246 (m ((r1 # r2)::t, w, s) x = (x = (r1::r2::t,w,s))) /\ 247 (m (Repeat r::t, w,s) x = (x = (t, w, s)) \/ 248 (x = (r::Repeat r::t, w, SOME(Repeat r::t))))/\ 249 (m _ _ = F)`; 250 251 252val match_seq_def = 253 Define 254 `(match_seq [] = T) /\ 255 (match_seq [a] = (FST a = []) /\ (FST (SND a) = [])) /\ 256 (match_seq (a::b::rest) = m a b /\ match_seq (b::rest))`; 257 258(*---------------------------------------------------------------------------*) 259(* Induction and case analysis theorems for match sequences *) 260(*---------------------------------------------------------------------------*) 261 262val m_ind = fetch "-" "m_ind"; 263val ms_ind = fetch "-" "match_seq_ind"; 264 265val m_ind_thm = Q.prove 266(`!P:'a regexp list # 'a list # 'a regexp list option -> 267 'a regexp list # 'a list # 'a regexp list option -> bool. 268 (!t w s x. P (Epsilon::t,w,s) x) /\ 269 (!c t d w s x. P (Charset c::t,d::w,s) x) /\ 270 (!r1 r2 t w s x. P ((r1 + r2)::t,w,s) x) /\ 271 (!r1 r2 t w s x. P ((r1 # r2)::t,w,s) x) /\ 272 (!r t w s x. P (Repeat r::t,w,s) x) /\ 273 (!c t s x. P (Charset c::t,[],s) x) /\ 274 (!w s. P ([],w) s) ==> 275 !v v1 v2 v3 v4 v5. P (v,v1,v2) (v3,v4,v5)`, 276 METIS_TAC [ABS_PAIR_THM,m_ind]); 277 278val m_thm = SIMP_RULE list_ss [] (Q.prove 279(`!rl w s rl' w' s' r l. 280 ~(s = SOME (Repeat r::l)) ==> 281 (s' = SOME (Repeat r::l)) ==> 282 m (rl, w, s) (rl', w', s') ==> 283 ((rl = Repeat r::l) /\ (w = w'))`, 284 recInduct m_ind_thm THEN RW_TAC list_ss [m_def])); 285 286val m_thm2 = Q.prove 287(`!rl w s rl' w' s'. ~(w = w') ==> m (rl, w, s) (rl',w',s') ==> (s' = NONE)`, 288 recInduct m_ind_thm THEN RW_TAC list_ss [m_def]); 289 290val cases_lemma = Q.prove 291(`!a: 'a regexp list # 'a list # 'a regexp list option. 292 (?x. a = ([],[],x)) \/ (?h t x. a = ([],h::t,x)) \/ 293 (?t w s. a = (Epsilon::t,w,s)) \/ 294 (?c t w s. a = (Charset c::t,[],s)) \/ 295 (?c d t w s. a = (Charset c::t,d::w,s)) \/ 296 (?r1 r2 t w s. a = ((r1+r2)::t,w,s)) \/ 297 (?r1 r2 t w s. a = ((r1#r2)::t,w,s)) \/ 298 (?r t w s. a = (Repeat r::t,w,s))`, 299 METIS_TAC [list_CASES,ABS_PAIR_THM,regexp_cases]); 300 301val ms_ind_thm = Q.prove 302(`!P:('a regexp list # 'a list # 'a regexp list option) list -> bool. 303 P [] /\ 304 (!x. P [([],[],x)]) /\ 305 (!r t w s. P [(Repeat r::t,w,s)]) /\ 306 (!r1 r2 t w s. P [((r1 # r2)::t,w,s)]) /\ 307 (!r1 r2 t w s. P [((r1 + r2)::t,w,s)]) /\ 308 (!c d t w s. P [(Charset c::t,d::w,s)]) /\ 309 (!c t s. P [(Charset c::t,[],s)]) /\ 310 (!t w s. P [(Epsilon::t,w,s)]) /\ 311 (!h t s. P [([],h::t,s)]) /\ 312 (!t w s t' w' s' rst. 313 P ((t',w',s')::rst) ==> P((Epsilon::t,w,s)::(t',w',s')::rst)) /\ 314 (!c t d w s t' w' s' rst. 315 P ((t',w',s')::rst) ==> P((Charset c::t,d::w,s)::(t',w',s')::rst)) /\ 316 (!r1 r2 t w s t' w' s' rst. 317 P ((t',w',s')::rst) ==> P(((r1 + r2)::t,w,s)::(t',w',s')::rst)) /\ 318 (!r1 r2 t w s t' w' s' rst. 319 P ((t',w',s')::rst) ==> P(((r1 # r2)::t,w,s)::(t',w',s')::rst)) /\ 320 (!r t w s t' w' s' rst. 321 P ((t',w',s')::rst) ==> P((Repeat r::t,w,s)::(t',w',s')::rst)) /\ 322 (!c t s h1 t1. P ((Charset c::t,[],s)::h1::t1)) /\ 323 (!c w s h1 t1. P (([],c::w,s)::h1::t1)) /\ 324 (!s h1 t1. P (([],[],s)::h1::t1)) 325 ==> 326 !v. P v`, 327 GEN_TAC THEN STRIP_TAC THEN MATCH_MP_TAC ms_ind THEN 328 METIS_TAC [ABS_PAIR_THM, cases_lemma]); 329 330val match_seq_down_closed = Q.prove 331(`!L1 L2. match_seq (L1 ++ L2) ==> match_seq L2`, 332 Induct THEN RW_TAC list_ss [match_seq_def] 333 THEN Cases_on `L1++L2` 334 THEN FULL_SIMP_TAC list_ss [match_seq_def]); 335 336 337(*---------------------------------------------------------------------------*) 338(* Concatenation of match sequences requires the "stopper" to be changed. *) 339(* change_stop_on_prefix sets the stop argument in the first element in a *) 340(* match sequence to the specified value. This requires changing the stop *) 341(* argument in subsequent entries in the match sequence until a entry is *) 342(* reached that sets a new stop argument instead of just propogating the *) 343(* previous one. *) 344(*---------------------------------------------------------------------------*) 345 346val change_stop_on_prefix_def = 347 Define 348 `(change_stop_on_prefix ([] : ('a regexp list # 'a list # 349 'a regexp list option) list) ns = []) /\ 350 (change_stop_on_prefix ((Repeat r::t,w,s)::b::c) ns = 351 if b = (r::Repeat r::t, w, SOME (Repeat r::t)) 352 then(Repeat r::t,w,ns)::b::c 353 else (Repeat r::t,w,ns)::change_stop_on_prefix (b::c) ns) /\ 354 (change_stop_on_prefix ((Charset c1::t,w,s)::c) ns 355 = (Charset c1::t,w,ns)::c) /\ 356 (change_stop_on_prefix ((rl,w,s)::c) ns 357 = (rl,w,ns)::change_stop_on_prefix c ns)`; 358 359val csp_def = change_stop_on_prefix_def; 360val csp_ind = fetch "-" "change_stop_on_prefix_ind"; 361 362val csp_step_lem = Q.prove 363(`!alist stop rl w s x. 364 (alist = ((rl,w,s)::x)) ==> 365 ?t. change_stop_on_prefix alist stop = (rl,w,stop)::t`, 366 recInduct csp_ind THEN RW_TAC list_ss [csp_def]); 367 368val csp_step = SIMP_RULE list_ss [] csp_step_lem; 369 370val change_stop_on_prefix_thm = Q.prove 371(`!L stop. match_seq L ==> match_seq (change_stop_on_prefix L stop)`, 372 recInduct ms_ind_thm THEN RW_TAC list_ss [match_seq_def,m_def,csp_def] THEN 373 METIS_TAC [match_seq_def, m_def, csp_step]); 374 375val LENGTH_CSP = Q.prove 376(`!l s. LENGTH (change_stop_on_prefix l s) = LENGTH l`, 377 recInduct csp_ind THEN RW_TAC list_ss [csp_def]); 378 379(*---------------------------------------------------------------------------*) 380(* If match sequences m1 and m2 represent matching rl1 against w1 and rl2 *) 381(* against w2 respectively, then the composing m1 and m2 should represent *) 382(* matching rl1++rl2 against w1++w2. Thus the first element of m2 *) 383(* (rl2, w2, s2) should be appended to the end of all the elements in m1, *) 384(* and s2 should be changed to the last stop argument of m1. Then the *) 385(* sequences can be joined. *) 386(*---------------------------------------------------------------------------*) 387 388val compose_m_seq_def = 389 Define 390 `(compose_m_seq [] x2 = x2) /\ 391 (compose_m_seq x1 [] = x1) /\ 392 (compose_m_seq x1 ((r1,w1,s1)::x2) = 393 let x1 = MAP (\(r,w,s). (r++r1, w++w1, OPTION_MAP (\e. e++r1) s)) x1 in 394 let x2 = change_stop_on_prefix ((r1,w1,s1)::x2) (SND (SND (LAST x1))) 395 in 396 x1 ++ TL x2)`; 397 398val compose_m_seq_null = Q.prove 399(`(!x. compose_m_seq [] x = x) /\ (!x. compose_m_seq x [] = x)`, 400 CONJ_TAC THEN Cases THEN RW_TAC list_ss [compose_m_seq_def] THEN 401 Q.ID_SPEC_TAC `h` THEN SIMP_TAC list_ss [FORALL_PROD,compose_m_seq_def]); 402 403val lem = Q.prove 404(`!mseq r w s r1 w1 s1 s2 x. 405 (mseq = ((r1,w1,s1)::x)) ==> 406 match_seq mseq ==> 407 m (r,w,s) (r1,w1, s2) ==> 408 match_seq ((r,w,s)::change_stop_on_prefix ((r1,w1,s1)::x) s2)`, 409 recInduct ms_ind_thm 410 THEN NTAC 2 (RW_TAC list_ss 411 [csp_step, change_stop_on_prefix_def, match_seq_def, m_def]) 412 THEN METIS_TAC [CONS_ACYCLIC,list_CASES, CONS_11]); 413 414val compose_m_seq_thm = Q.prove 415(`!x1 x2. match_seq x1 ==> match_seq x2 ==> match_seq (compose_m_seq x1 x2)`, 416 REPEAT STRIP_TAC 417 THEN Induct_on `x1` 418 THEN RW_TAC list_ss [] 419 THEN `(?rl w s. h = (rl,w,s)) /\ 420 ((x1 = []) \/ ?g t1. x1 = g::t1) /\ 421 ((x2 = []) \/ ?rl' w' s' t2 . x2 = (rl',w',s')::t2)` 422 by METIS_TAC [list_CASES,ABS_PAIR_THM] 423 THEN RW_TAC list_ss [compose_m_seq_null] THENL 424 [(*Base Case*) 425 `(rl' = []) \/ ?h t. rl' = h::t` by METIS_TAC [list_CASES] 426 THEN `(t2 = []) \/ ?a z. t2 = a::z` by METIS_TAC [list_CASES] 427 THEN FULL_SIMP_TAC list_ss [match_seq_def] 428 THEN `(h = Epsilon) \/ (?c. h = Charset c) \/ 429 (?r1 r2. h = r1+r2) \/ (?r1 r2. h = r1#r2) \/ 430 (?r. h = Repeat r)` by METIS_TAC [regexp_cases] 431 THEN RW_TAC list_ss [] THEN EVAL_TAC THEN FULL_SIMP_TAC list_ss [m_def] 432 THEN RW_TAC list_ss [m_def, match_seq_def, SIMP_RULE std_ss [] lem] 433 THEN METIS_TAC [list_CASES, m_def, CONS_ACYCLIC, CONS_11], 434 (*Step Case*) 435 `?rlist w1 s1. g = (rlist,w1,s1)` by METIS_TAC [ABS_PAIR_THM] 436 THEN `((rl = []) \/ ?h t. rl = h::t) /\ 437 ((w = []) \/ ?b y. w = b::y)` by METIS_TAC [list_CASES] 438 THEN `(h = Epsilon) \/ (?c. h = Charset c) \/ 439 (?r1 r2. h = r1+r2) \/ (?r1 r2. h = r1#r2) \/ 440 (?r. h = Repeat r)` by METIS_TAC [regexp_cases] 441 THEN NTAC 2 (RW_TAC list_ss [] THEN 442 FULL_SIMP_TAC list_ss [LET_THM, m_def, match_seq_def, 443 compose_m_seq_def])]); 444 445(*---------------------------------------------------------------------------*) 446(* Match sequence suffixes can be swapped out. *) 447(*---------------------------------------------------------------------------*) 448 449val match_seq_lemma = Q.prove 450(`!a x y z. match_seq (x++[a]++z) /\ match_seq ([a]++y) 451 ==> match_seq (x++[a]++y)`, 452 Induct_on `x` 453 THEN RW_TAC list_ss [] 454 THEN Cases_on `x` 455 THEN METIS_TAC [APPEND, match_seq_def]); 456 457 458(*---------------------------------------------------------------------------*) 459(* If r matches w, then we can inductively build up a match sequence that *) 460(* starts with ([r], w, NONE). *) 461(*---------------------------------------------------------------------------*) 462 463val lem = Q.prove 464(`!(r: 'a regexp list) (w:'a list) s r' w' s' x x'. 465 ?t. compose_m_seq ((r, w, s)::x) ((r', w', s')::x') 466 = 467 (r++r', w++w', OPTION_MAP (\e.e++r') s)::t`, 468 RW_TAC list_ss [compose_m_seq_def, LET_THM]); 469 470val sem_implies_match_seq_exists = Q.prove 471(`!r w. sem r w ==> ?x. match_seq (([r], w, NONE)::x)`, 472 Induct THENL 473 [RW_TAC list_ss [sem_def] 474 THEN Q.EXISTS_TAC `[([],[],NONE)]` 475 THEN RW_TAC list_ss [match_seq_def, m_def], 476 RW_TAC list_ss [sem_def] 477 THEN Q.EXISTS_TAC `[([],[],NONE)]` 478 THEN RW_TAC list_ss [match_seq_def, m_def], 479 RW_TAC list_ss [sem_def] THEN RES_TAC THENL 480 [Q.EXISTS_TAC `([r],w,NONE)::x` THEN RW_TAC list_ss [match_seq_def, m_def], 481 Q.EXISTS_TAC `([r'],w,NONE)::x` 482 THEN RW_TAC list_ss [match_seq_def, m_def]], 483 RW_TAC list_ss [sem_def] THEN RES_TAC 484 THEN Q.EXISTS_TAC `compose_m_seq (([r],w1,NONE)::x') (([r'],w2,NONE)::x)` 485 THEN STRIP_ASSUME_TAC 486 (Q.SPECL [`[r]`, `w1`, `NONE`, `[r']`, `w2`,`NONE`, `x'`, `x`] lem) 487 THEN FULL_SIMP_TAC list_ss [match_seq_def] 488 THEN METIS_TAC [m_def, compose_m_seq_thm], 489 SIMP_TAC list_ss [repeat_equals_ncat_thm, GSYM LEFT_FORALL_IMP_THM] 490 THEN Induct_on `n` THENL 491 [RW_TAC list_ss [FUNPOW] 492 THEN FULL_SIMP_TAC list_ss [sem_def] 493 THEN Q.EXISTS_TAC `[([],[],NONE)]` 494 THEN RW_TAC list_ss [match_seq_def, m_def], 495 FULL_SIMP_TAC list_ss [FUNPOW_SUC] 496 THEN RW_TAC list_ss [sem_def] 497 THEN `?x1 x2. match_seq (([r],w1,NONE)::x1) /\ 498 match_seq (([Repeat r],w2,NONE)::x2)` by METIS_TAC [] 499 THEN Q.EXISTS_TAC `change_stop_on_prefix 500 (compose_m_seq (([r],w1,NONE)::x1) 501 (([Repeat r],w2,NONE)::x2)) (SOME [Repeat r])` 502 THEN STRIP_ASSUME_TAC 503 (Q.SPECL [`[r]`, `w1`, `NONE`, `[Repeat r]`, 504 `w2`,`NONE`, `x1`, `x2`] lem) 505 THEN STRIP_ASSUME_TAC 506 (Q.SPECL [`SOME[Repeat r]`, `[r]++[Repeat r]`, 507 `w1++w2`, `NONE`, `t`] csp_step) 508 THEN FULL_SIMP_TAC list_ss [match_seq_def] 509 THEN METIS_TAC [m_def, compose_m_seq_thm, change_stop_on_prefix_thm]]]); 510 511 512(*---------------------------------------------------------------------------*) 513(* Given a match sequence starting with (rl,w,s), then (match rl w s) holds *) 514(* as long as the match sequence avoids cycles in the matcher. *) 515(*---------------------------------------------------------------------------*) 516 517val witness_implies_match_thm = 518let val thms = [match_def, match_seq_def, m_def] in 519Q.prove 520(`!rl w s x y. 521 (((rl,w,s)::y) = x) 522 ==> match_seq x /\ 523 (!r1 t1 w1. ~MEM (Repeat r1::t1, w1, SOME (Repeat r1::t1)) x) 524 ==> match rl w s`, 525 Induct_on `x` THEN RW_TAC list_ss [] THEN 526 `((x=[]) \/ ?h t. x = h::t) /\ 527 ((rl=[]) \/ (?rst. rl = Epsilon::rst) 528 \/ (?c rst. rl = Charset c::rst) 529 \/ (?r1 r2 rst. rl = (r1+r2)::rst) 530 \/ (?r1 r2 rst. rl = (r1#r2)::rst) 531 \/ (?r2 rst. rl = Repeat r2::rst))` 532 by METIS_TAC [list_CASES,regexp_cases] 533 THEN RW_TAC list_ss [] 534 THEN FULL_SIMP_TAC list_ss thms THENL 535 [Cases_on `w` THEN FULL_SIMP_TAC list_ss thms, 536 RW_TAC list_ss [] THEN METIS_TAC thms, 537 RW_TAC list_ss [] THEN METIS_TAC thms]) 538end; 539 540(*---------------------------------------------------------------------------*) 541(* Split a list in two around the first point where predicate P holds. *) 542(*---------------------------------------------------------------------------*) 543 544val split_def = 545 Define 546 `(split P [] acc = NONE) /\ 547 (split P (h::t) acc = if P h then SOME (acc, h, t) 548 else split P t (acc++[h]))`; 549 550val split_thm = Q.prove 551(`!P l acc l1 v l2. 552 (split P l acc = (SOME (l1, v, l2))) ==> (l1 ++ [v] ++ l2 = acc ++ l)`, 553 Induct_on `l` 554 THEN RW_TAC list_ss [split_def] 555 THEN METIS_TAC [APPEND, APPEND_ASSOC]); 556 557val split_thm2 = Q.prove 558(`!P l acc l1 v l2. 559 (split P l acc = (SOME (l1, v, l2))) ==> ?l3. (l1 = acc++l3)`, 560 Induct_on `l` 561 THEN RW_TAC list_ss [split_def] THENL 562 [Q.EXISTS_TAC `[]` THEN RW_TAC list_ss [], 563 RES_TAC THEN Q.EXISTS_TAC `[h]++l3` THEN RW_TAC list_ss []]); 564 565val split_thm3 = Q.prove 566(`!P l acc l1 v l2 l3. 567 (split P l (l3 ++ acc) = SOME (l3++l1, v, l2)) 568 = 569 (split P l acc = (SOME (l1, v, l2)))`, 570 Induct_on `l` 571 THEN RW_TAC list_ss [split_def] 572 THEN METIS_TAC [APPEND_ASSOC]); 573 574val split_thm4 = Q.prove 575(`!P l acc. (split P l acc = NONE) ==> ~EXISTS P l`, 576 Induct_on `l` 577 THEN RW_TAC list_ss [split_def] 578 THEN FULL_SIMP_TAC list_ss [] 579 THEN METIS_TAC []); 580 581val split_thm5 = Q.prove 582(`!P l acc acc2. (split P l acc = NONE) ==> (split P l acc2 = NONE)`, 583 Induct_on `l` 584 THEN RW_TAC list_ss [split_def] 585 THEN METIS_TAC [split_def]); 586 587val split_prop_thm = Q.prove 588(`!L P a b c acc. (split P L acc = SOME(a,b,c)) ==> P b`, 589 Induct 590 THEN RW_TAC list_ss [split_def] 591 THEN METIS_TAC []); 592 593val split_cong_thm = Q.prove 594(`!l P acc P' l' acc'. 595 (l=l') /\ (!x. MEM x l ==> (P x = P' x)) /\ (acc=acc') 596 ==> (split P l acc = split P' l' acc')`, 597 Induct_on `l` 598 THEN RW_TAC list_ss [split_def] 599 THEN FULL_SIMP_TAC list_ss [split_def]); 600 601(*---------------------------------------------------------------------------*) 602(* If a cycle exists in a match sequence, then the part of the sequence *) 603(* before the cycle contains the Repeat expressions that lead to the cycle. *) 604(*---------------------------------------------------------------------------*) 605 606val match_seq_find_lemma = Q.prove 607(`!ms l1 r l w z. 608 match_seq ms 609 ==> (split (\x. ?r l w. x = (Repeat r::l, w, SOME (Repeat r::l))) ms [] = 610 SOME (l1,(Repeat r::l,w,SOME(Repeat r::l)), z)) 611 ==> (~((SND(SND(HD ms))) = SOME (Repeat r::l)) \/ ~((FST(SND(HD ms))) = w)) 612 ==> ?s. MEM (Repeat r::l, w, s) l1`, 613 Induct 614 THEN FULL_SIMP_TAC list_ss [split_def, match_seq_def, m_def] 615 THEN REPEAT GEN_TAC 616 THEN `(l1 = []) \/ ?h1 t1. l1 = h1::t1` by METIS_TAC [list_CASES] 617 THEN RW_TAC list_ss [split_def, match_seq_def, m_def] 618 THEN TRY (IMP_RES_TAC split_thm2 THEN FULL_SIMP_TAC list_ss [] THEN NO_TAC) 619 (* 2 cases left, almost identical; do both cases with same tactic *) 620 THEN 621 (Q.PAT_ASSUM `$! M `(MP_TAC o Q.SPECL [`t1`, `r`, `l`, `w`, `z`]) 622 THEN RW_TAC list_ss [split_def, match_seq_def, m_def] 623 THEN `match_seq ms` by METIS_TAC [match_seq_down_closed, APPEND] 624 THEN FULL_SIMP_TAC list_ss [split_def, match_seq_def, m_def] 625 THEN IMP_RES_TAC split_thm2 626 THEN FULL_SIMP_TAC list_ss [] THEN RW_TAC list_ss [] 627 THEN ASSUME_TAC (Q.SPECL 628 [`\x. ?r l w. x = (Repeat r::l,w,SOME (Repeat r::l))`, 629 `ms`, `[]`, `l3`, `(Repeat r::l,w,SOME (Repeat r::l))`, `z`, 630 `[h]`] 631 (INST_TYPE [alpha |-> Type`:'a regexp list#'a list#'a regexp list option`] 632 split_thm3)) 633 THEN FULL_SIMP_TAC list_ss [] 634 THEN RES_TAC THEN FULL_SIMP_TAC list_ss [] THEN RW_TAC list_ss [] 635 THEN Cases_on `~(SND (SND (HD ms)) = SOME (Repeat r::l))` 636 THENL [METIS_TAC [],FULL_SIMP_TAC list_ss [] THEN RW_TAC list_ss []] 637 THEN Cases_on `~(FST (SND (HD ms)) = w)` 638 THENL [METIS_TAC [],FULL_SIMP_TAC list_ss [] THEN RW_TAC list_ss []] 639 THEN `(ms = []) \/ ?a b c mst. ms = (a,b,c)::mst` 640 by METIS_TAC [list_CASES,ABS_PAIR_THM] 641 THENL [FULL_SIMP_TAC list_ss [split_def, match_seq_def, m_def], 642 FULL_SIMP_TAC list_ss [] THEN RW_TAC list_ss [] 643 THEN `?a1 b1 c1. h = (a1,b1,c1)` by METIS_TAC [ABS_PAIR_THM] 644 THEN RW_TAC list_ss [] THEN FULL_SIMP_TAC list_ss [match_seq_def] 645 THEN METIS_TAC [m_thm,m_thm2,NOT_SOME_NONE]])); 646 647 648(*---------------------------------------------------------------------------*) 649(* Normalize a match sequence to cut the cycles out of it. *) 650(* To remove a cycle, find it and the regexp that started the cycle and cut *) 651(* out the intermediate parts of the match sequence. *) 652(*---------------------------------------------------------------------------*) 653 654val NS_DEF = 655 tDefine 656 "NS" 657 `NS ms = 658 case split (\x. ?r l w. x = (Repeat r::l, w, SOME (Repeat r::l))) ms [] of 659 NONE -> ms 660 || SOME (l1, (bad_r, bad_w, bad_s), z) -> 661 let opt = split (\x. ?s. x = (bad_r, bad_w, s)) l1 [] 662 in if opt = NONE then [] 663 else 664 let (x1, (rl, w1, s1), y) = THE opt in 665 if (?ra L. (rl = Repeat ra::L) /\ (FST(HD z) = L)) 666 then NS (x1 ++ [(rl,w1,s1)] ++ change_stop_on_prefix z s1) 667 else NS (x1 ++ [(rl,w1,s1)] ++ z)` 668(WF_REL_TAC `measure LENGTH` 669 THEN RW_TAC list_ss [LENGTH_CSP] 670 THEN IMP_RES_TAC split_thm THEN FULL_SIMP_TAC list_ss [] 671 THEN RW_TAC list_ss [] 672 THEN Cases_on `split (\(p1,p1',p2). (p1 = bad_r) /\ (p1' = bad_w)) l1 []` 673 THEN RW_TAC list_ss [] 674 THEN FULL_SIMP_TAC list_ss [] 675 THEN Cases_on `x` THEN Cases_on `r` 676 THEN IMP_RES_TAC split_thm THEN FULL_SIMP_TAC list_ss [] 677 THEN RW_TAC list_ss []); 678 679val NS_IND = fetch "-" "NS_ind"; 680 681(*---------------------------------------------------------------------------*) 682(* One step of NS preserves match sequences *) 683(*---------------------------------------------------------------------------*) 684 685val match_seq_surg_thm = Q.prove 686(`!ms l1 bad_r bad_w z x1 rl w1 s1 y. 687 match_seq ms ==> 688 (split (\x. ?r l w. x = (Repeat r::l, w, SOME (Repeat r::l))) ms [] = 689 SOME (l1, (bad_r, bad_w, bad_s), z)) ==> 690 (split (\x. ?s. x = (bad_r, bad_w, s)) l1 [] = 691 SOME (x1, (rl, w1, s1), y)) ==> 692 if (?ra L. (rl = Repeat ra::L) /\ (FST(HD z) = L)) then 693 match_seq (x1 ++ [(rl, w1, s1)] ++ change_stop_on_prefix z s1) 694 else 695 match_seq (x1 ++ [(rl, w1, s1)] ++ z)`, 696 Induct THENL 697 [RW_TAC list_ss [] 698 THEN IMP_RES_TAC split_thm 699 THEN FULL_SIMP_TAC list_ss [], 700 REPEAT STRIP_TAC 701 THEN `(bad_r = rl) /\ (bad_w = w1)` 702 by (IMP_RES_TAC split_prop_thm THEN POP_ASSUM (K ALL_TAC) 703 THEN FULL_SIMP_TAC list_ss [LAMBDA_PROD]) 704 THEN IMP_RES_TAC split_thm THEN FULL_SIMP_TAC list_ss [] 705 THEN `match_seq (x1 ++ [(rl,w1,s1)] ++ y ++ [(rl,w1,bad_s)] ++ z)` 706 by METIS_TAC [] 707 THEN `(z=[]) \/ ?a b c t1. z = (a,b,c)::t1` 708 by METIS_TAC [list_CASES,ABS_PAIR_THM] THENL 709 [FULL_SIMP_TAC list_ss [match_seq_def] 710 THEN `match_seq [(bad_r,bad_w,bad_s)]` 711 by METIS_TAC [match_seq_down_closed] 712 THEN FULL_SIMP_TAC list_ss [match_seq_def] 713 THEN RW_TAC list_ss [change_stop_on_prefix_def, match_seq_def] 714 THEN `match_seq ([([],[],s1)] ++ y ++ [([],[],bad_s)])` 715 by METIS_TAC [match_seq_down_closed,APPEND_ASSOC] 716 THEN Cases_on `y` THEN FULL_SIMP_TAC list_ss [match_seq_def,m_def], 717 RW_TAC list_ss [] THENL 718 [`match_seq (change_stop_on_prefix ((a,b,c)::t1) s1)` 719 by METIS_TAC [match_seq_down_closed,change_stop_on_prefix_thm] 720 THEN ASSUME_TAC (Q.SPECL 721 [`(Repeat ra::a,bad_w,s1)`, `x1`, 722 `change_stop_on_prefix ((a,b,c)::t1) s1`, 723 `y ++ [(Repeat ra::a,bad_w,bad_s)] ++ ((a,b,c)::t1)`] 724 match_seq_lemma) 725 THEN FIRST_ASSUM MATCH_MP_TAC THEN RW_TAC list_ss [] 726 THEN `?t2. change_stop_on_prefix ((a,b,c)::t1) s1 = (a,b,s1)::t2` 727 by METIS_TAC[csp_step] 728 THEN POP_ASSUM SUBST_ALL_TAC 729 THEN RW_TAC list_ss [match_seq_def] 730 THEN `m (Repeat ra::a,bad_w,bad_s) (a,b,c)` 731 by METIS_TAC [match_seq_down_closed,APPEND,APPEND_ASSOC, 732 match_seq_def] 733 THEN FULL_SIMP_TAC list_ss [m_def], 734 ASSUME_TAC (Q.SPECL 735 [`(bad_r,bad_w,s1)`, `x1`, `(a,b,c)::t1`, 736 `y ++ [(bad_r,bad_w,bad_s)] ++ ((a,b,c)::t1)`] match_seq_lemma) 737 THEN FIRST_ASSUM MATCH_MP_TAC THEN RW_TAC list_ss [match_seq_def] 738 THEN `m (bad_r,bad_w,bad_s) (a,b,c)` 739 by METIS_TAC [match_seq_down_closed,APPEND,APPEND_ASSOC, 740 match_seq_def] 741 THEN `?r l. bad_r = Repeat r::l` by IMP_RES_TAC split_prop_thm 742 THEN POP_ASSUM MP_TAC THEN EVAL_TAC THEN RW_TAC list_ss [] 743 THENL [FULL_SIMP_TAC list_ss [m_def], 744 METIS_TAC [match_seq_down_closed]]]]]); 745 746val NS_match_thm = Q.prove 747(`!ms. match_seq ms ==> match_seq (NS ms)`, 748 recInduct NS_IND THEN RW_TAC list_ss [] 749 THEN ONCE_REWRITE_TAC [NS_DEF] THEN CASE_TAC THENL 750 [METIS_TAC [], 751 REPEAT CASE_TAC 752 THEN RW_TAC list_ss [match_seq_def, LET_THM] 753 THEN REPEAT (POP_ASSUM MP_TAC) 754 THEN RENAME_FREES_TAC [("q","l1"), ("r'","l2"), 755 ("q''","a"),("q'","b"),("r''","c")] 756 THEN REPEAT STRIP_TAC 757 THEN `(split (\x. ?s. x = (a,b,s)) l1 [] = NONE) \/ 758 ?a1 b1a b1b b1c c1. 759 split (\x. ?s. x = (a,b,s)) l1 [] = SOME (a1,(b1a,b1b,b1c),c1)` 760 by METIS_TAC [optionTheory.option_nchotomy,ABS_PAIR_THM] 761 THEN RW_TAC list_ss [match_seq_def] 762 THEN IMP_RES_TAC match_seq_surg_thm 763 THEN FULL_SIMP_TAC list_ss [] 764 THEN RW_TAC list_ss [] THEN METIS_TAC []]); 765 766val NS_is_normal_thm = Q.prove 767(`!ms r w t. ~MEM (Repeat r::t, w, SOME (Repeat r::t)) (NS ms)`, 768 recInduct NS_IND THEN RW_TAC list_ss [] 769 THEN ONCE_REWRITE_TAC [NS_DEF] 770 THEN CASE_TAC THENL 771 [IMP_RES_TAC split_thm4 772 THEN FULL_SIMP_TAC list_ss [] THEN RW_TAC list_ss [] 773 THEN Induct_on `ms` 774 THEN RW_TAC list_ss [] 775 THEN FULL_SIMP_TAC list_ss [split_def] 776 THEN METIS_TAC [split_thm5], 777 REPEAT CASE_TAC THEN RW_TAC list_ss [LET_THM] 778 THEN REPEAT (POP_ASSUM MP_TAC) 779 THEN RENAME_FREES_TAC [("q","l1"), ("r''","l2"), 780 ("q''","a"),("q'","b"),("r'''","c")] 781 THEN NTAC 4 STRIP_TAC 782 THEN `(split (\x. ?s. x = (a,b,s)) l1 [] = NONE) \/ 783 ?a1 b1a b1b b1c c1. 784 split (\x. ?s. x = (a,b,s)) l1 [] = SOME (a1,(b1a,b1b,b1c),c1)` 785 by METIS_TAC [optionTheory.option_nchotomy,ABS_PAIR_THM] 786 THEN FULL_SIMP_TAC list_ss [] 787 THEN RW_TAC list_ss [] 788 THEN FULL_SIMP_TAC list_ss [] 789 THEN METIS_TAC []]); 790 791val lem = Q.prove 792(`EXISTS (\y. y = (a,b,c)) L ==> EXISTS (\y. ?c. y = (a,b,c)) L`, 793Induct_on `L` THEN EVAL_TAC THEN RW_TAC list_ss [] THEN METIS_TAC[]); 794 795(*---------------------------------------------------------------------------*) 796(* Normalizing a match sequence doesn't change the first element *) 797(*---------------------------------------------------------------------------*) 798 799val NS_HD_THM = Q.prove 800(`!t t' r w. (t = (r,w,NONE)::t') ==> 801 match_seq t ==> 802 ?t''. (NS t = (r, w, NONE)::t'')`, 803 recInduct NS_IND THEN RW_TAC list_ss [] 804 THEN ONCE_REWRITE_TAC [NS_DEF] 805 THEN REPEAT CASE_TAC 806 THEN REPEAT (POP_ASSUM MP_TAC) 807 THEN RENAME_FREES_TAC [("q","l1"), ("r''","l2"), 808 ("q''","a"),("q'","b"),("r'''","c")] 809 THEN NTAC 4 STRIP_TAC 810 THEN RW_TAC list_ss [LET_THM] THENL 811 [IMP_RES_TAC split_prop_thm THEN FULL_SIMP_TAC list_ss [] 812 THEN IMP_RES_TAC match_seq_find_lemma THEN FULL_SIMP_TAC list_ss [] 813 THEN IMP_RES_TAC split_thm4 814 THEN `(a=[]) \/ ?d u. a = d::u` by METIS_TAC [list_CASES] 815 THEN RW_TAC list_ss [] THEN METIS_TAC [MEM_EXISTS_LEM,lem], 816 `?a1 b1a b1b b1c c1. 817 split (\x. ?s. x = (a,b,s)) l1 [] = SOME (a1,(b1a,b1b,b1c),c1)` 818 by METIS_TAC [optionTheory.option_nchotomy,ABS_PAIR_THM] 819 THEN Cases_on `l1` THEN FULL_SIMP_TAC list_ss [] 820 THENL [FULL_SIMP_TAC list_ss [split_def], ALL_TAC] 821 THEN `h = (r,w,NONE)` by IMP_RES_TAC split_thm THEN FULL_SIMP_TAC list_ss [] 822 THEN IMP_RES_TAC match_seq_surg_thm 823 THEN Cases_on `a1` THEN RW_TAC list_ss [match_seq_def] 824 THEN IMP_RES_TAC split_thm THEN FULL_SIMP_TAC list_ss []]); 825 826(*---------------------------------------------------------------------------*) 827(* The semantics imply match. *) 828(*---------------------------------------------------------------------------*) 829 830val sem_implies_match = Q.prove 831(`!r w. sem r w ==> match [r] w NONE`, 832 METIS_TAC [sem_implies_match_seq_exists, NS_match_thm, 833 NS_is_normal_thm, NS_HD_THM, witness_implies_match_thm]); 834 835(*---------------------------------------------------------------------------*) 836(* match correctly implements the semantics. *) 837(*---------------------------------------------------------------------------*) 838 839val match_is_correct = Q.prove 840(`!r w. sem r w = match [r] w NONE`, 841 REPEAT (STRIP_TAC ORELSE EQ_TAC) THENL 842 [RW_TAC list_ss [sem_implies_match], 843 IMP_RES_TAC match_implies_sem THEN FULL_SIMP_TAC list_ss [FOLDR,sem_def]]); 844 845val _ = Count.report (Count.read thm_counter); 846 847