1open HolKernel boolLib bossLib Parse;
2
3open listTheory;
4open rich_listTheory;
5open prim_recTheory;
6open arithmeticTheory;
7open pred_setTheory;
8open pairTheory;
9open boolTheory;
10
11load "set_lemmasTheory";
12open set_lemmasTheory;
13
14val _ = new_theory"boyer_moore_spec";
15
16(*
17    SOLUTION DEFINITION AND BEHAVIOUR
18    ---------------------------------
19                                        *)
20(* -- SOLUTION SET -- *)
21(* Formal Definition of potential solutions. *)
22val solutions_def =
23    Define
24    `
25    solutions pat search =
26        if
27            ~(LENGTH pat <= LENGTH search)
28        then
29            {}
30        else
31            {k| k <= LENGTH search - LENGTH pat
32                /\ (!i. i < LENGTH pat
33                        ==> (EL (i+k) search = EL i pat)
34                    )
35            }
36    `;
37
38(* The capacity to skip and produce the same solution set *)
39val SOLS_SHF_THM = Q.prove(
40    `(m <= LENGTH search) /\
41       (!d. d < m ==> ~(d IN solutions pat search))
42        ==> (!x. x IN solutions pat (DROP m search)
43             <=> (m+x) IN solutions pat search)`,
44    Cases_on `LENGTH search = 0`
45    >- (strip_tac
46        >> `m=0`
47                by simp[]
48        >> simp[])
49    >- (rw[solutions_def]
50        >- fs[]
51        >- (rw[EQ_IMP_THM]
52            >> `EL (i+x) (DROP m search) = EL ((i+x)+m) search`
53                    suffices_by rw[ADD_CLAUSES]
54            >> `((i + x) + m) < LENGTH search`
55                    suffices_by metis_tac[EL_DROP]
56            >> `i + x + m <= i + (LENGTH search - (m + LENGTH pat)) + m`
57                    by rw[]
58            >> `i + (LENGTH search - (m + LENGTH pat)) + m
59                < LENGTH pat + (LENGTH search - (m + LENGTH pat)) + m`
60                    by rw[]
61            >> `LENGTH pat + (LENGTH search - (m + LENGTH pat)) + m
62                = LENGTH search`
63                    suffices_by rw[LESS_EQ_LESS_TRANS]
64            >> simp[])
65        )
66    );
67
68(* Shifting capacity of the minimum of the solution set *)
69val SOLS_MIN_SHF = Q.prove(
70    `(m <= LENGTH search) /\
71     (!d. d < m ==> ~(d IN solutions pat search)) /\
72     (solutions pat (DROP m search) <> {}) /\
73     (solutions pat search <> {})
74     ==> (m + MIN_SET (solutions pat (DROP m search))
75          = MIN_SET (solutions pat search))`,
76    REPEAT STRIP_TAC
77    >> qabbrev_tac `sols_d = solutions pat (DROP m search)`
78    >> qabbrev_tac `sols = solutions pat search`
79    >> `!x. x IN sols_d <=> m + x IN sols`
80            by rw[SOLS_SHF_THM, Abbr `sols`, Abbr `sols_d`]
81    >> `(m + MIN_SET sols_d <= MIN_SET sols) /\
82        (MIN_SET sols <= m + MIN_SET sols_d)`
83            suffices_by rw[LESS_EQUAL_ANTISYM]
84    >> STRIP_TAC
85    >- (`MIN_SET sols IN sols`
86            by fs[MIN_SET_LEM]
87        >> `m <= MIN_SET sols`
88                by (CCONTR_TAC
89                    >> (`?d. (d < m) /\ (d IN sols)`
90                            suffices_by metis_tac[])
91                    >> qexists_tac `MIN_SET sols`
92                    >> fs[])
93        >> `(MIN_SET sols - m) IN sols_d`
94                by rw[ADD_CLAUSES]
95        >> `MIN_SET sols_d <= MIN_SET sols - m`
96                by rw[MIN_SET_LEM]
97        >> fs[MIN_SET_LEM])
98    >- (`MIN_SET sols_d IN sols_d`
99            by fs[MIN_SET_LEM]
100        >> `m + (MIN_SET sols_d) IN sols`
101                by metis_tac[]
102        >> fs[MIN_SET_LEM])
103    );
104
105(* Shifting a solution set appropriately does not change
106   its nullness *)
107val SOLS_NULL_EQ = Q.prove(
108    `(m <= LENGTH search) /\
109     (!d. d < m ==> ~(d IN solutions pat search))
110     ==> ((solutions pat (DROP m search) = {})
111           <=> (solutions pat search = {}))`,
112    REPEAT STRIP_TAC
113    >> rw[EQ_IMP_THM]
114    >> qabbrev_tac `sols_d = solutions pat (DROP m search)`
115    >> qabbrev_tac `sols = solutions pat search`
116    >> `!x. x IN sols_d <=> m + x IN sols`
117            by rw[SOLS_SHF_THM, Abbr `sols`, Abbr `sols_d`]
118    >- (CCONTR_TAC
119        >> `?y. y IN sols` by metis_tac[MEMBER_NOT_EMPTY]
120        >> `m<=y`
121                by (CCONTR_TAC
122                    >> `?d. (d < m) /\ (d IN sols)`
123                            suffices_by metis_tac[]
124                    >> qexists_tac `y`
125                    >> fs[])
126        >> `(y - m) IN sols_d`
127                by rw[ADD_CLAUSES]
128        >> `?z. z IN sols_d`
129                suffices_by metis_tac[MEMBER_NOT_EMPTY]
130        >> qexists_tac `y-m`
131        >> fs[])
132    >- (CCONTR_TAC
133        >> `?y. y IN sols_d`
134                by metis_tac[MEMBER_NOT_EMPTY]
135        >> `m + y IN sols`
136                by metis_tac[ADD_CLAUSES]
137        >> `?z. z IN sols`
138                suffices_by metis_tac[MEMBER_NOT_EMPTY]
139        >> qexists_tac `m + y`
140        >> fs[])
141    );
142
143
144
145
146
147
148
149(* -- FIRST SOLUTION -- *)
150(* The actual solution we want is the minimum of all
151   solutions. We include past the end of
152   the search string a 'solution'. It is the failure indicator. *)
153val solution_def =
154    Define
155    `
156    solution pat search = MIN_SET ((LENGTH search) INSERT
157                                   solutions pat search)
158    `;
159
160(* Shifting capacity of the solution*)
161val SOL_SHF_THM = store_thm(
162    "SOL_SHF_THM",
163    ``(LENGTH pat <= LENGTH search) /\
164     (m <= LENGTH search) /\
165     (!d. d < m ==> ~(d IN solutions pat search))
166     ==> (m + solution pat (DROP m search)
167          = solution pat search)``,
168    rw[solution_def]
169    >> Cases_on `solutions pat search = {}`
170    >- (`solutions pat (DROP m search) = {}`
171            by rw[SOLS_NULL_EQ]
172         >> simp[MIN_SET_THM])
173    >-  (qabbrev_tac `sols_d = solutions pat (DROP m search)`
174         >> qabbrev_tac `sols = solutions pat search`
175         >> qabbrev_tac `L = LENGTH search`
176         >> qabbrev_tac `P = LENGTH pat`
177         >> `sols_d <> {}`
178                 by rw[SOLS_NULL_EQ, Abbr `sols`, Abbr `sols_d`]
179         >> `MIN_SET (L - m INSERT sols_d) = MIN (L-m) (MIN_SET sols_d)`
180                 by (Cases_on `sols_d` >> metis_tac[MIN_SET_THM])
181         >> `MIN_SET (L INSERT sols) = MIN L (MIN_SET sols)`
182                 by (Cases_on `sols` >> metis_tac[MIN_SET_THM])
183         >> `MIN_SET sols <= L`
184                 by rw[Abbr `sols`, Abbr `L`, solutions_def,
185                       MIN_SET_UPPER_BOUND, DECIDE ``a <= b - c ==> a <= b``]
186         >> fs[MIN_DEF,SOLS_MIN_SHF, Abbr `sols_d`, Abbr `sols`])
187    );
188
189
190
191
192(* -- CHARACTER MISMATCH SHIFTS -- *)
193(* Formal Definition of Valid Character Shifts *)
194val valid_cha_shifts_def =
195   Define
196   `
197   valid_cha_shifts pat all_chars j a =
198        (j+1) INSERT {d | 1 <= d /\ d <= j
199                          /\ (EL (j-d) pat = EL a all_chars)}
200   `;
201
202(* Confirmation that a valid character shift exists *)
203val CHA_SHIFT_EXISTS_THM = store_thm(
204    "CHA_SHIFT_EXISTS_THM",
205    ``valid_cha_shifts pat all_chars j a <> {}``,
206    rw[valid_cha_shifts_def]
207    );
208
209(* Confirmation that skipped shifts not in valid_cha_shifts give
210   invalid alignments *)
211val CHA_SKIP_NOT_SOL = store_thm(
212    "CHA_SKIP_NOT_SOL",
213    ``((k <= LENGTH search - LENGTH pat)
214     /\ (j < LENGTH pat)
215     /\ (EL j pat <> EL (k+j) search)
216     /\ (EL (k+j) search = EL a all_chars)
217     )
218     ==> (!d. d < MIN_SET (valid_cha_shifts pat all_chars j a)
219              ==> ~((k+d) IN solutions pat search))``,
220    DISCH_TAC
221    >> fs[]
222    >> rw[]
223    >> `d NOTIN valid_cha_shifts pat all_chars j a`
224            by metis_tac[CHA_SHIFT_EXISTS_THM, MIN_SET_LEM,
225                            DECIDE ``a < b /\ b <= a ==> F``]
226    >> `d < j + 1`
227            by (irule MIN_SET_IS_LOWER_BOUND
228                >> rpt conj_tac
229                >> qexists_tac `valid_cha_shifts pat all_chars j a`
230                >> rw[valid_cha_shifts_def])
231    >> fs[valid_cha_shifts_def]
232    >- (`d + k = k` by fs[]
233        >> `k NOTIN solutions pat search`
234                suffices_by rw[]
235        >> rw[solutions_def]
236        >> `EL (j + k) search <> EL j pat`
237                by metis_tac[EQ_SYM_EQ]
238        >> metis_tac[])
239    >- (rw[solutions_def]
240        >> DISJ2_TAC
241        >> qexists_tac `j - d`
242        >> simp[ADD_CLAUSES])
243    );
244
245
246(* -- SUFFIX MATCH SHIFTS -- *)
247(* Formal Definition of Valid Suffix Shifts *)
248val valid_suf_shifts_def =
249    Define
250    `
251    valid_suf_shifts pat j  =
252        {d | 1 <= d /\ d <= LENGTH pat
253            /\ (!i. (MAX (j+1) d <= i) /\ (i <= LENGTH pat - 1)
254                    ==> (EL (i-d) pat = EL i pat))
255            /\ ((d >= j+1) \/ (EL (j-d) pat <> EL j pat))
256        }
257    `;
258
259(* Confirmation that a valid suffix shift exists in correct circumstances *)
260val SUF_SHIFT_EXISTS_THM = store_thm(
261    "SUF_SHIFT_EXISTS_THM",
262    ``j < LENGTH pat ==> valid_suf_shifts pat j <> {}``,
263    rw[valid_suf_shifts_def]
264    >> Cases_on `pat`
265    >- fs[]
266    >-(simp[EXTENSION]
267       >> qexists_tac `SUC (LENGTH t)`
268       >> simp[]
269       >> fs[])
270    );
271
272(* Confirmation that skipped shifts not in valid_suf_shifts give
273   invalid alignments *)
274val SUF_SKIP_NOT_SOL = store_thm(
275    "SUF_SKIP_NOT_SOL",
276    ``((k <= LENGTH search - LENGTH pat) /\
277      (j < LENGTH pat)
278    /\ (!i. (j<i /\ i < LENGTH pat)
279           ==> (EL i pat = EL (k+i) search))
280    /\ (EL j pat <> EL (k+j) search)
281    )
282    ==> (!d. d < MIN_SET (valid_suf_shifts pat j)
283           ==> ~((k+d) IN solutions pat search)
284        )
285    ``,
286    rw[solutions_def]
287    >> Cases_on `d = 0`
288    >- (simp[] >> metis_tac[])
289    >- (`d > 0`
290            by rw[NOT_ZERO_LT_ZERO]
291        >> Cases_on `(d < j+1) /\ (EL (j- d) pat = EL j pat)`
292        >- (qabbrev_tac `q = j - d`
293            >> `q < LENGTH pat`
294                    by simp[ADD_CLAUSES, Abbr `q`]
295            >> `EL (q + d) pat <> EL (d + (q + k)) search`
296                    by rw[Abbr `q`]
297            >> `EL q pat = EL (q + d) pat`
298                    by rw[Abbr `q`]
299            >> Cases_on `d + k <= LENGTH search - LENGTH pat`
300            >> fs[]
301            >> `EL (d + (k + q)) search = EL (d + (q + k)) search`
302                    by rw[ADD_CLAUSES]
303            >> `EL q pat <> EL (d + (q + k)) search`
304                    by metis_tac[]
305            >> qexists_tac `q`
306            >> rw[])
307        >- (`d NOTIN valid_suf_shifts pat j`
308                by metis_tac[SUF_SHIFT_EXISTS_THM,
309                             MIN_SET_LEM,
310                            DECIDE ``a < b /\ b <= a
311                                     ==> F``]
312            >> pop_assum mp_tac
313            >> simp[valid_suf_shifts_def]
314            >> reverse (rw [])
315            >- fs[]
316            >- (DISJ2_TAC
317                >> qexists_tac `i - d`
318                >> simp[])
319            >-(`MIN_SET (valid_suf_shifts pat j) <= LENGTH pat`
320                    suffices_by simp[]
321                >> irule MIN_SET_UPPER_BOUND
322                >> rpt conj_tac
323                >- rw[valid_suf_shifts_def]
324                >- simp[SUF_SHIFT_EXISTS_THM])
325            )
326        )
327    );
328
329val _ = export_theory();