1open HolKernel boolLib bossLib Parse; 2 3open listTheory; 4open rich_listTheory; 5open prim_recTheory; 6open arithmeticTheory; 7open pred_setTheory; 8open pairTheory; 9open boolTheory; 10 11load "set_lemmasTheory"; 12open set_lemmasTheory; 13 14val _ = new_theory"boyer_moore_spec"; 15 16(* 17 SOLUTION DEFINITION AND BEHAVIOUR 18 --------------------------------- 19 *) 20(* -- SOLUTION SET -- *) 21(* Formal Definition of potential solutions. *) 22val solutions_def = 23 Define 24 ` 25 solutions pat search = 26 if 27 ~(LENGTH pat <= LENGTH search) 28 then 29 {} 30 else 31 {k| k <= LENGTH search - LENGTH pat 32 /\ (!i. i < LENGTH pat 33 ==> (EL (i+k) search = EL i pat) 34 ) 35 } 36 `; 37 38(* The capacity to skip and produce the same solution set *) 39val SOLS_SHF_THM = Q.prove( 40 `(m <= LENGTH search) /\ 41 (!d. d < m ==> ~(d IN solutions pat search)) 42 ==> (!x. x IN solutions pat (DROP m search) 43 <=> (m+x) IN solutions pat search)`, 44 Cases_on `LENGTH search = 0` 45 >- (strip_tac 46 >> `m=0` 47 by simp[] 48 >> simp[]) 49 >- (rw[solutions_def] 50 >- fs[] 51 >- (rw[EQ_IMP_THM] 52 >> `EL (i+x) (DROP m search) = EL ((i+x)+m) search` 53 suffices_by rw[ADD_CLAUSES] 54 >> `((i + x) + m) < LENGTH search` 55 suffices_by metis_tac[EL_DROP] 56 >> `i + x + m <= i + (LENGTH search - (m + LENGTH pat)) + m` 57 by rw[] 58 >> `i + (LENGTH search - (m + LENGTH pat)) + m 59 < LENGTH pat + (LENGTH search - (m + LENGTH pat)) + m` 60 by rw[] 61 >> `LENGTH pat + (LENGTH search - (m + LENGTH pat)) + m 62 = LENGTH search` 63 suffices_by rw[LESS_EQ_LESS_TRANS] 64 >> simp[]) 65 ) 66 ); 67 68(* Shifting capacity of the minimum of the solution set *) 69val SOLS_MIN_SHF = Q.prove( 70 `(m <= LENGTH search) /\ 71 (!d. d < m ==> ~(d IN solutions pat search)) /\ 72 (solutions pat (DROP m search) <> {}) /\ 73 (solutions pat search <> {}) 74 ==> (m + MIN_SET (solutions pat (DROP m search)) 75 = MIN_SET (solutions pat search))`, 76 REPEAT STRIP_TAC 77 >> qabbrev_tac `sols_d = solutions pat (DROP m search)` 78 >> qabbrev_tac `sols = solutions pat search` 79 >> `!x. x IN sols_d <=> m + x IN sols` 80 by rw[SOLS_SHF_THM, Abbr `sols`, Abbr `sols_d`] 81 >> `(m + MIN_SET sols_d <= MIN_SET sols) /\ 82 (MIN_SET sols <= m + MIN_SET sols_d)` 83 suffices_by rw[LESS_EQUAL_ANTISYM] 84 >> STRIP_TAC 85 >- (`MIN_SET sols IN sols` 86 by fs[MIN_SET_LEM] 87 >> `m <= MIN_SET sols` 88 by (CCONTR_TAC 89 >> (`?d. (d < m) /\ (d IN sols)` 90 suffices_by metis_tac[]) 91 >> qexists_tac `MIN_SET sols` 92 >> fs[]) 93 >> `(MIN_SET sols - m) IN sols_d` 94 by rw[ADD_CLAUSES] 95 >> `MIN_SET sols_d <= MIN_SET sols - m` 96 by rw[MIN_SET_LEM] 97 >> fs[MIN_SET_LEM]) 98 >- (`MIN_SET sols_d IN sols_d` 99 by fs[MIN_SET_LEM] 100 >> `m + (MIN_SET sols_d) IN sols` 101 by metis_tac[] 102 >> fs[MIN_SET_LEM]) 103 ); 104 105(* Shifting a solution set appropriately does not change 106 its nullness *) 107val SOLS_NULL_EQ = Q.prove( 108 `(m <= LENGTH search) /\ 109 (!d. d < m ==> ~(d IN solutions pat search)) 110 ==> ((solutions pat (DROP m search) = {}) 111 <=> (solutions pat search = {}))`, 112 REPEAT STRIP_TAC 113 >> rw[EQ_IMP_THM] 114 >> qabbrev_tac `sols_d = solutions pat (DROP m search)` 115 >> qabbrev_tac `sols = solutions pat search` 116 >> `!x. x IN sols_d <=> m + x IN sols` 117 by rw[SOLS_SHF_THM, Abbr `sols`, Abbr `sols_d`] 118 >- (CCONTR_TAC 119 >> `?y. y IN sols` by metis_tac[MEMBER_NOT_EMPTY] 120 >> `m<=y` 121 by (CCONTR_TAC 122 >> `?d. (d < m) /\ (d IN sols)` 123 suffices_by metis_tac[] 124 >> qexists_tac `y` 125 >> fs[]) 126 >> `(y - m) IN sols_d` 127 by rw[ADD_CLAUSES] 128 >> `?z. z IN sols_d` 129 suffices_by metis_tac[MEMBER_NOT_EMPTY] 130 >> qexists_tac `y-m` 131 >> fs[]) 132 >- (CCONTR_TAC 133 >> `?y. y IN sols_d` 134 by metis_tac[MEMBER_NOT_EMPTY] 135 >> `m + y IN sols` 136 by metis_tac[ADD_CLAUSES] 137 >> `?z. z IN sols` 138 suffices_by metis_tac[MEMBER_NOT_EMPTY] 139 >> qexists_tac `m + y` 140 >> fs[]) 141 ); 142 143 144 145 146 147 148 149(* -- FIRST SOLUTION -- *) 150(* The actual solution we want is the minimum of all 151 solutions. We include past the end of 152 the search string a 'solution'. It is the failure indicator. *) 153val solution_def = 154 Define 155 ` 156 solution pat search = MIN_SET ((LENGTH search) INSERT 157 solutions pat search) 158 `; 159 160(* Shifting capacity of the solution*) 161val SOL_SHF_THM = store_thm( 162 "SOL_SHF_THM", 163 ``(LENGTH pat <= LENGTH search) /\ 164 (m <= LENGTH search) /\ 165 (!d. d < m ==> ~(d IN solutions pat search)) 166 ==> (m + solution pat (DROP m search) 167 = solution pat search)``, 168 rw[solution_def] 169 >> Cases_on `solutions pat search = {}` 170 >- (`solutions pat (DROP m search) = {}` 171 by rw[SOLS_NULL_EQ] 172 >> simp[MIN_SET_THM]) 173 >- (qabbrev_tac `sols_d = solutions pat (DROP m search)` 174 >> qabbrev_tac `sols = solutions pat search` 175 >> qabbrev_tac `L = LENGTH search` 176 >> qabbrev_tac `P = LENGTH pat` 177 >> `sols_d <> {}` 178 by rw[SOLS_NULL_EQ, Abbr `sols`, Abbr `sols_d`] 179 >> `MIN_SET (L - m INSERT sols_d) = MIN (L-m) (MIN_SET sols_d)` 180 by (Cases_on `sols_d` >> metis_tac[MIN_SET_THM]) 181 >> `MIN_SET (L INSERT sols) = MIN L (MIN_SET sols)` 182 by (Cases_on `sols` >> metis_tac[MIN_SET_THM]) 183 >> `MIN_SET sols <= L` 184 by rw[Abbr `sols`, Abbr `L`, solutions_def, 185 MIN_SET_UPPER_BOUND, DECIDE ``a <= b - c ==> a <= b``] 186 >> fs[MIN_DEF,SOLS_MIN_SHF, Abbr `sols_d`, Abbr `sols`]) 187 ); 188 189 190 191 192(* -- CHARACTER MISMATCH SHIFTS -- *) 193(* Formal Definition of Valid Character Shifts *) 194val valid_cha_shifts_def = 195 Define 196 ` 197 valid_cha_shifts pat all_chars j a = 198 (j+1) INSERT {d | 1 <= d /\ d <= j 199 /\ (EL (j-d) pat = EL a all_chars)} 200 `; 201 202(* Confirmation that a valid character shift exists *) 203val CHA_SHIFT_EXISTS_THM = store_thm( 204 "CHA_SHIFT_EXISTS_THM", 205 ``valid_cha_shifts pat all_chars j a <> {}``, 206 rw[valid_cha_shifts_def] 207 ); 208 209(* Confirmation that skipped shifts not in valid_cha_shifts give 210 invalid alignments *) 211val CHA_SKIP_NOT_SOL = store_thm( 212 "CHA_SKIP_NOT_SOL", 213 ``((k <= LENGTH search - LENGTH pat) 214 /\ (j < LENGTH pat) 215 /\ (EL j pat <> EL (k+j) search) 216 /\ (EL (k+j) search = EL a all_chars) 217 ) 218 ==> (!d. d < MIN_SET (valid_cha_shifts pat all_chars j a) 219 ==> ~((k+d) IN solutions pat search))``, 220 DISCH_TAC 221 >> fs[] 222 >> rw[] 223 >> `d NOTIN valid_cha_shifts pat all_chars j a` 224 by metis_tac[CHA_SHIFT_EXISTS_THM, MIN_SET_LEM, 225 DECIDE ``a < b /\ b <= a ==> F``] 226 >> `d < j + 1` 227 by (irule MIN_SET_IS_LOWER_BOUND 228 >> rpt conj_tac 229 >> qexists_tac `valid_cha_shifts pat all_chars j a` 230 >> rw[valid_cha_shifts_def]) 231 >> fs[valid_cha_shifts_def] 232 >- (`d + k = k` by fs[] 233 >> `k NOTIN solutions pat search` 234 suffices_by rw[] 235 >> rw[solutions_def] 236 >> `EL (j + k) search <> EL j pat` 237 by metis_tac[EQ_SYM_EQ] 238 >> metis_tac[]) 239 >- (rw[solutions_def] 240 >> DISJ2_TAC 241 >> qexists_tac `j - d` 242 >> simp[ADD_CLAUSES]) 243 ); 244 245 246(* -- SUFFIX MATCH SHIFTS -- *) 247(* Formal Definition of Valid Suffix Shifts *) 248val valid_suf_shifts_def = 249 Define 250 ` 251 valid_suf_shifts pat j = 252 {d | 1 <= d /\ d <= LENGTH pat 253 /\ (!i. (MAX (j+1) d <= i) /\ (i <= LENGTH pat - 1) 254 ==> (EL (i-d) pat = EL i pat)) 255 /\ ((d >= j+1) \/ (EL (j-d) pat <> EL j pat)) 256 } 257 `; 258 259(* Confirmation that a valid suffix shift exists in correct circumstances *) 260val SUF_SHIFT_EXISTS_THM = store_thm( 261 "SUF_SHIFT_EXISTS_THM", 262 ``j < LENGTH pat ==> valid_suf_shifts pat j <> {}``, 263 rw[valid_suf_shifts_def] 264 >> Cases_on `pat` 265 >- fs[] 266 >-(simp[EXTENSION] 267 >> qexists_tac `SUC (LENGTH t)` 268 >> simp[] 269 >> fs[]) 270 ); 271 272(* Confirmation that skipped shifts not in valid_suf_shifts give 273 invalid alignments *) 274val SUF_SKIP_NOT_SOL = store_thm( 275 "SUF_SKIP_NOT_SOL", 276 ``((k <= LENGTH search - LENGTH pat) /\ 277 (j < LENGTH pat) 278 /\ (!i. (j<i /\ i < LENGTH pat) 279 ==> (EL i pat = EL (k+i) search)) 280 /\ (EL j pat <> EL (k+j) search) 281 ) 282 ==> (!d. d < MIN_SET (valid_suf_shifts pat j) 283 ==> ~((k+d) IN solutions pat search) 284 ) 285 ``, 286 rw[solutions_def] 287 >> Cases_on `d = 0` 288 >- (simp[] >> metis_tac[]) 289 >- (`d > 0` 290 by rw[NOT_ZERO_LT_ZERO] 291 >> Cases_on `(d < j+1) /\ (EL (j- d) pat = EL j pat)` 292 >- (qabbrev_tac `q = j - d` 293 >> `q < LENGTH pat` 294 by simp[ADD_CLAUSES, Abbr `q`] 295 >> `EL (q + d) pat <> EL (d + (q + k)) search` 296 by rw[Abbr `q`] 297 >> `EL q pat = EL (q + d) pat` 298 by rw[Abbr `q`] 299 >> Cases_on `d + k <= LENGTH search - LENGTH pat` 300 >> fs[] 301 >> `EL (d + (k + q)) search = EL (d + (q + k)) search` 302 by rw[ADD_CLAUSES] 303 >> `EL q pat <> EL (d + (q + k)) search` 304 by metis_tac[] 305 >> qexists_tac `q` 306 >> rw[]) 307 >- (`d NOTIN valid_suf_shifts pat j` 308 by metis_tac[SUF_SHIFT_EXISTS_THM, 309 MIN_SET_LEM, 310 DECIDE ``a < b /\ b <= a 311 ==> F``] 312 >> pop_assum mp_tac 313 >> simp[valid_suf_shifts_def] 314 >> reverse (rw []) 315 >- fs[] 316 >- (DISJ2_TAC 317 >> qexists_tac `i - d` 318 >> simp[]) 319 >-(`MIN_SET (valid_suf_shifts pat j) <= LENGTH pat` 320 suffices_by simp[] 321 >> irule MIN_SET_UPPER_BOUND 322 >> rpt conj_tac 323 >- rw[valid_suf_shifts_def] 324 >- simp[SUF_SHIFT_EXISTS_THM]) 325 ) 326 ) 327 ); 328 329val _ = export_theory();