1(* Author: Tobias Nipkow, Florian Haftmann, TU Muenchen *)
2
3section \<open>Character and string types\<close>
4
5theory String
6imports Enum
7begin
8
9subsection \<open>Strings as list of bytes\<close>
10
11text \<open>
12  When modelling strings, we follow the approach given
13  in \<^url>\<open>https://utf8everywhere.org/\<close>:
14
15  \<^item> Strings are a list of bytes (8 bit).
16
17  \<^item> Byte values from 0 to 127 are US-ASCII.
18
19  \<^item> Byte values from 128 to 255 are uninterpreted blobs.
20\<close>
21
22subsubsection \<open>Bytes as datatype\<close>
23
24context unique_euclidean_semiring_with_bit_shifts
25begin
26
27lemma bit_horner_sum_iff:
28  \<open>bit (foldr (\<lambda>b k. of_bool b + k * 2) bs 0) n \<longleftrightarrow> n < length bs \<and> bs ! n\<close>
29proof (induction bs arbitrary: n)
30  case Nil
31  then show ?case
32    by simp
33next
34  case (Cons b bs)
35  show ?case
36  proof (cases n)
37    case 0
38    then show ?thesis
39      by simp
40  next
41    case (Suc m)
42    with bit_rec [of _ n] Cons.prems Cons.IH [of m]
43    show ?thesis by simp
44  qed
45qed
46
47lemma take_bit_horner_sum_eq:
48  \<open>take_bit n (foldr (\<lambda>b k. of_bool b + k * 2) bs 0) = foldr (\<lambda>b k. of_bool b + k * 2) (take n bs) 0\<close>
49proof (induction bs arbitrary: n)
50  case Nil
51  then show ?case
52    by simp
53next
54  case (Cons b bs)
55  show ?case
56  proof (cases n)
57    case 0
58    then show ?thesis
59      by simp
60  next
61    case (Suc m)
62    with take_bit_rec [of n] Cons.prems Cons.IH [of m]
63    show ?thesis by (simp add: ac_simps)
64  qed
65qed
66
67lemma (in semiring_bit_shifts) take_bit_eq_horner_sum:
68  \<open>take_bit n a = foldr (\<lambda>b k. of_bool b + k * 2) (map (bit a) [0..<n]) 0\<close>
69proof (induction a arbitrary: n rule: bits_induct)
70  case (stable a)
71  have *: \<open>((\<lambda>k. k * 2) ^^ n) 0 = 0\<close>
72    by (induction n) simp_all
73  from stable have \<open>bit a = (\<lambda>_. odd a)\<close>
74    by (simp add: stable_imp_bit_iff_odd fun_eq_iff)
75  then have \<open>map (bit a) [0..<n] = replicate n (odd a)\<close>
76    by (simp add: map_replicate_const)
77  with stable show ?case
78    by (simp add: stable_imp_take_bit_eq mask_eq_seq_sum *)
79next
80  case (rec a b)
81  show ?case
82  proof (cases n)
83    case 0
84    then show ?thesis
85      by simp
86  next
87    case (Suc m)
88    have \<open>map (bit (of_bool b + 2 * a)) [0..<Suc m] = b # map (bit (of_bool b + 2 * a)) [Suc 0..<Suc m]\<close>
89      by (simp only: upt_conv_Cons) simp
90    also have \<open>\<dots> = b # map (bit a) [0..<m]\<close>
91      by (simp only: flip: map_Suc_upt) (simp add: bit_Suc rec.hyps)
92    finally show ?thesis
93      using Suc rec.IH [of m] by (simp add: take_bit_Suc rec.hyps, simp add: ac_simps)
94  qed
95qed
96
97end
98
99datatype char =
100  Char (digit0: bool) (digit1: bool) (digit2: bool) (digit3: bool)
101       (digit4: bool) (digit5: bool) (digit6: bool) (digit7: bool)
102
103context comm_semiring_1
104begin
105
106definition of_char :: \<open>char \<Rightarrow> 'a\<close>
107  where \<open>of_char c = foldr (\<lambda>b k. of_bool b + k * 2) 
108    [digit0 c, digit1 c, digit2 c, digit3 c, digit4 c, digit5 c, digit6 c, digit7 c] 0\<close>
109
110lemma of_char_Char [simp]:
111  \<open>of_char (Char b0 b1 b2 b3 b4 b5 b6 b7) =
112    foldr (\<lambda>b k. of_bool b + k * 2) [b0, b1, b2, b3, b4, b5, b6, b7] 0\<close>
113  by (simp add: of_char_def)
114
115end
116
117context unique_euclidean_semiring_with_bit_shifts
118begin
119
120definition char_of :: \<open>'a \<Rightarrow> char\<close>
121  where \<open>char_of n = Char (odd n) (bit n 1) (bit n 2) (bit n 3) (bit n 4) (bit n 5) (bit n 6) (bit n 7)\<close>
122
123lemma char_of_take_bit_eq:
124  \<open>char_of (take_bit n m) = char_of m\<close> if \<open>n \<ge> 8\<close>
125  using that by (simp add: char_of_def bit_take_bit_iff)
126
127lemma char_of_char [simp]:
128  \<open>char_of (of_char c) = c\<close>
129  by (simp only: of_char_def char_of_def bit_horner_sum_iff) simp
130
131lemma char_of_comp_of_char [simp]:
132  "char_of \<circ> of_char = id"
133  by (simp add: fun_eq_iff)
134
135lemma inj_of_char:
136  \<open>inj of_char\<close>
137proof (rule injI)
138  fix c d
139  assume "of_char c = of_char d"
140  then have "char_of (of_char c) = char_of (of_char d)"
141    by simp
142  then show "c = d"
143    by simp
144qed
145
146lemma of_char_eqI:
147  \<open>c = d\<close> if \<open>of_char c = of_char d\<close>
148  using that inj_of_char by (simp add: inj_eq)
149
150lemma of_char_eq_iff [simp]:
151  \<open>of_char c = of_char d \<longleftrightarrow> c = d\<close>
152  by (auto intro: of_char_eqI)
153
154lemma of_char_of [simp]:
155  \<open>of_char (char_of a) = a mod 256\<close>
156proof -
157  have \<open>[0..<8] = [0, Suc 0, 2, 3, 4, 5, 6, 7 :: nat]\<close>
158    by (simp add: upt_eq_Cons_conv)
159  then have \<open>[odd a, bit a 1, bit a 2, bit a 3, bit a 4, bit a 5, bit a 6, bit a 7] = map (bit a) [0..<8]\<close>
160    by simp
161  then have \<open>of_char (char_of a) = take_bit 8 a\<close>
162    by (simp only: char_of_def of_char_def char.sel take_bit_eq_horner_sum)
163  then show ?thesis
164    by (simp add: take_bit_eq_mod)
165qed
166
167lemma char_of_mod_256 [simp]:
168  \<open>char_of (n mod 256) = char_of n\<close>
169  by (rule of_char_eqI) simp
170
171lemma of_char_mod_256 [simp]:
172  \<open>of_char c mod 256 = of_char c\<close>
173proof -
174  have \<open>of_char (char_of (of_char c)) mod 256 = of_char (char_of (of_char c))\<close>
175    by (simp only: of_char_of) simp
176  then show ?thesis
177    by simp
178qed
179
180lemma char_of_quasi_inj [simp]:
181  \<open>char_of m = char_of n \<longleftrightarrow> m mod 256 = n mod 256\<close> (is \<open>?P \<longleftrightarrow> ?Q\<close>)
182proof
183  assume ?Q
184  then show ?P
185    by (auto intro: of_char_eqI)
186next
187  assume ?P
188  then have \<open>of_char (char_of m) = of_char (char_of n)\<close>
189    by simp
190  then show ?Q
191    by simp
192qed
193
194lemma char_of_eq_iff:
195  \<open>char_of n = c \<longleftrightarrow> take_bit 8 n = of_char c\<close>
196  by (auto intro: of_char_eqI simp add: take_bit_eq_mod)
197
198lemma char_of_nat [simp]:
199  \<open>char_of (of_nat n) = char_of n\<close>
200  by (simp add: char_of_def String.char_of_def drop_bit_of_nat)
201
202end
203
204lemma inj_on_char_of_nat [simp]:
205  "inj_on char_of {0::nat..<256}"
206  by (rule inj_onI) simp
207
208lemma nat_of_char_less_256 [simp]:
209  "of_char c < (256 :: nat)"
210proof -
211  have "of_char c mod (256 :: nat) < 256"
212    by arith
213  then show ?thesis by simp
214qed
215
216lemma range_nat_of_char:
217  "range of_char = {0::nat..<256}"
218proof (rule; rule)
219  fix n :: nat
220  assume "n \<in> range of_char"
221  then show "n \<in> {0..<256}"
222    by auto
223next
224  fix n :: nat
225  assume "n \<in> {0..<256}"
226  then have "n = of_char (char_of n)"
227    by simp
228  then show "n \<in> range of_char"
229    by (rule range_eqI)
230qed
231
232lemma UNIV_char_of_nat:
233  "UNIV = char_of ` {0::nat..<256}"
234proof -
235  have "range (of_char :: char \<Rightarrow> nat) = of_char ` char_of ` {0::nat..<256}"
236    by (auto simp add: range_nat_of_char intro!: image_eqI)
237  with inj_of_char [where ?'a = nat] show ?thesis 
238    by (simp add: inj_image_eq_iff)
239qed
240
241lemma card_UNIV_char:
242  "card (UNIV :: char set) = 256"
243  by (auto simp add: UNIV_char_of_nat card_image)
244
245context
246  includes lifting_syntax integer.lifting natural.lifting
247begin
248
249lemma [transfer_rule]:
250  \<open>(pcr_integer ===> (=)) char_of char_of\<close>
251  by (unfold char_of_def) transfer_prover
252
253lemma [transfer_rule]:
254  \<open>((=) ===> pcr_integer) of_char of_char\<close>
255  by (unfold of_char_def) transfer_prover
256
257lemma [transfer_rule]:
258  \<open>(pcr_natural ===> (=)) char_of char_of\<close>
259  by (unfold char_of_def) transfer_prover
260
261lemma [transfer_rule]:
262  \<open>((=) ===> pcr_natural) of_char of_char\<close>
263  by (unfold of_char_def) transfer_prover
264
265end
266
267lifting_update integer.lifting
268lifting_forget integer.lifting
269
270lifting_update natural.lifting
271lifting_forget natural.lifting
272
273syntax
274  "_Char" :: "str_position \<Rightarrow> char"    ("CHR _")
275  "_Char_ord" :: "num_const \<Rightarrow> char"   ("CHR _")
276
277type_synonym string = "char list"
278
279syntax
280  "_String" :: "str_position \<Rightarrow> string"    ("_")
281
282ML_file \<open>Tools/string_syntax.ML\<close>
283
284instantiation char :: enum
285begin
286
287definition
288  "Enum.enum = [
289    CHR 0x00, CHR 0x01, CHR 0x02, CHR 0x03,
290    CHR 0x04, CHR 0x05, CHR 0x06, CHR 0x07,
291    CHR 0x08, CHR 0x09, CHR ''\<newline>'', CHR 0x0B,
292    CHR 0x0C, CHR 0x0D, CHR 0x0E, CHR 0x0F,
293    CHR 0x10, CHR 0x11, CHR 0x12, CHR 0x13,
294    CHR 0x14, CHR 0x15, CHR 0x16, CHR 0x17,
295    CHR 0x18, CHR 0x19, CHR 0x1A, CHR 0x1B,
296    CHR 0x1C, CHR 0x1D, CHR 0x1E, CHR 0x1F,
297    CHR '' '', CHR ''!'', CHR 0x22, CHR ''#'',
298    CHR ''$'', CHR ''%'', CHR ''&'', CHR 0x27,
299    CHR ''('', CHR '')'', CHR ''*'', CHR ''+'',
300    CHR '','', CHR ''-'', CHR ''.'', CHR ''/'',
301    CHR ''0'', CHR ''1'', CHR ''2'', CHR ''3'',
302    CHR ''4'', CHR ''5'', CHR ''6'', CHR ''7'',
303    CHR ''8'', CHR ''9'', CHR '':'', CHR '';'',
304    CHR ''<'', CHR ''='', CHR ''>'', CHR ''?'',
305    CHR ''@'', CHR ''A'', CHR ''B'', CHR ''C'',
306    CHR ''D'', CHR ''E'', CHR ''F'', CHR ''G'',
307    CHR ''H'', CHR ''I'', CHR ''J'', CHR ''K'',
308    CHR ''L'', CHR ''M'', CHR ''N'', CHR ''O'',
309    CHR ''P'', CHR ''Q'', CHR ''R'', CHR ''S'',
310    CHR ''T'', CHR ''U'', CHR ''V'', CHR ''W'',
311    CHR ''X'', CHR ''Y'', CHR ''Z'', CHR ''['',
312    CHR 0x5C, CHR '']'', CHR ''^'', CHR ''_'',
313    CHR 0x60, CHR ''a'', CHR ''b'', CHR ''c'',
314    CHR ''d'', CHR ''e'', CHR ''f'', CHR ''g'',
315    CHR ''h'', CHR ''i'', CHR ''j'', CHR ''k'',
316    CHR ''l'', CHR ''m'', CHR ''n'', CHR ''o'',
317    CHR ''p'', CHR ''q'', CHR ''r'', CHR ''s'',
318    CHR ''t'', CHR ''u'', CHR ''v'', CHR ''w'',
319    CHR ''x'', CHR ''y'', CHR ''z'', CHR ''{'',
320    CHR ''|'', CHR ''}'', CHR ''~'', CHR 0x7F,
321    CHR 0x80, CHR 0x81, CHR 0x82, CHR 0x83,
322    CHR 0x84, CHR 0x85, CHR 0x86, CHR 0x87,
323    CHR 0x88, CHR 0x89, CHR 0x8A, CHR 0x8B,
324    CHR 0x8C, CHR 0x8D, CHR 0x8E, CHR 0x8F,
325    CHR 0x90, CHR 0x91, CHR 0x92, CHR 0x93,
326    CHR 0x94, CHR 0x95, CHR 0x96, CHR 0x97,
327    CHR 0x98, CHR 0x99, CHR 0x9A, CHR 0x9B,
328    CHR 0x9C, CHR 0x9D, CHR 0x9E, CHR 0x9F,
329    CHR 0xA0, CHR 0xA1, CHR 0xA2, CHR 0xA3,
330    CHR 0xA4, CHR 0xA5, CHR 0xA6, CHR 0xA7,
331    CHR 0xA8, CHR 0xA9, CHR 0xAA, CHR 0xAB,
332    CHR 0xAC, CHR 0xAD, CHR 0xAE, CHR 0xAF,
333    CHR 0xB0, CHR 0xB1, CHR 0xB2, CHR 0xB3,
334    CHR 0xB4, CHR 0xB5, CHR 0xB6, CHR 0xB7,
335    CHR 0xB8, CHR 0xB9, CHR 0xBA, CHR 0xBB,
336    CHR 0xBC, CHR 0xBD, CHR 0xBE, CHR 0xBF,
337    CHR 0xC0, CHR 0xC1, CHR 0xC2, CHR 0xC3,
338    CHR 0xC4, CHR 0xC5, CHR 0xC6, CHR 0xC7,
339    CHR 0xC8, CHR 0xC9, CHR 0xCA, CHR 0xCB,
340    CHR 0xCC, CHR 0xCD, CHR 0xCE, CHR 0xCF,
341    CHR 0xD0, CHR 0xD1, CHR 0xD2, CHR 0xD3,
342    CHR 0xD4, CHR 0xD5, CHR 0xD6, CHR 0xD7,
343    CHR 0xD8, CHR 0xD9, CHR 0xDA, CHR 0xDB,
344    CHR 0xDC, CHR 0xDD, CHR 0xDE, CHR 0xDF,
345    CHR 0xE0, CHR 0xE1, CHR 0xE2, CHR 0xE3,
346    CHR 0xE4, CHR 0xE5, CHR 0xE6, CHR 0xE7,
347    CHR 0xE8, CHR 0xE9, CHR 0xEA, CHR 0xEB,
348    CHR 0xEC, CHR 0xED, CHR 0xEE, CHR 0xEF,
349    CHR 0xF0, CHR 0xF1, CHR 0xF2, CHR 0xF3,
350    CHR 0xF4, CHR 0xF5, CHR 0xF6, CHR 0xF7,
351    CHR 0xF8, CHR 0xF9, CHR 0xFA, CHR 0xFB,
352    CHR 0xFC, CHR 0xFD, CHR 0xFE, CHR 0xFF]"
353
354definition
355  "Enum.enum_all P \<longleftrightarrow> list_all P (Enum.enum :: char list)"
356
357definition
358  "Enum.enum_ex P \<longleftrightarrow> list_ex P (Enum.enum :: char list)"
359
360lemma enum_char_unfold:
361  "Enum.enum = map char_of [0..<256]"
362proof -
363  have "map (of_char :: char \<Rightarrow> nat) Enum.enum = [0..<256]"
364    by (simp add: enum_char_def of_char_def upt_conv_Cons_Cons numeral_2_eq_2 [symmetric])
365  then have "map char_of (map (of_char :: char \<Rightarrow> nat) Enum.enum) =
366    map char_of [0..<256]"
367    by simp
368  then show ?thesis
369    by simp
370qed
371
372instance proof
373  show UNIV: "UNIV = set (Enum.enum :: char list)"
374    by (simp add: enum_char_unfold UNIV_char_of_nat atLeast0LessThan)
375  show "distinct (Enum.enum :: char list)"
376    by (auto simp add: enum_char_unfold distinct_map intro: inj_onI)
377  show "\<And>P. Enum.enum_all P \<longleftrightarrow> Ball (UNIV :: char set) P"
378    by (simp add: UNIV enum_all_char_def list_all_iff)
379  show "\<And>P. Enum.enum_ex P \<longleftrightarrow> Bex (UNIV :: char set) P"
380    by (simp add: UNIV enum_ex_char_def list_ex_iff)
381qed
382
383end
384
385lemma linorder_char:
386  "class.linorder (\<lambda>c d. of_char c \<le> (of_char d :: nat)) (\<lambda>c d. of_char c < (of_char d :: nat))"
387  by standard auto
388
389text \<open>Optimized version for execution\<close>
390
391definition char_of_integer :: "integer \<Rightarrow> char"
392  where [code_abbrev]: "char_of_integer = char_of"
393
394definition integer_of_char :: "char \<Rightarrow> integer"
395  where [code_abbrev]: "integer_of_char = of_char"
396
397lemma char_of_integer_code [code]:
398  "char_of_integer k = (let
399     (q0, b0) = bit_cut_integer k;
400     (q1, b1) = bit_cut_integer q0;
401     (q2, b2) = bit_cut_integer q1;
402     (q3, b3) = bit_cut_integer q2;
403     (q4, b4) = bit_cut_integer q3;
404     (q5, b5) = bit_cut_integer q4;
405     (q6, b6) = bit_cut_integer q5;
406     (_, b7) = bit_cut_integer q6
407    in Char b0 b1 b2 b3 b4 b5 b6 b7)"
408  by (simp add: bit_cut_integer_def char_of_integer_def char_of_def div_mult2_numeral_eq bit_iff_odd_drop_bit drop_bit_eq_div)
409
410lemma integer_of_char_code [code]:
411  "integer_of_char (Char b0 b1 b2 b3 b4 b5 b6 b7) =
412    ((((((of_bool b7 * 2 + of_bool b6) * 2 +
413      of_bool b5) * 2 + of_bool b4) * 2 +
414        of_bool b3) * 2 + of_bool b2) * 2 +
415          of_bool b1) * 2 + of_bool b0"
416  by (simp add: integer_of_char_def of_char_def)
417
418
419subsection \<open>Strings as dedicated type for target language code generation\<close>
420
421subsubsection \<open>Logical specification\<close>
422
423context
424begin
425
426qualified definition ascii_of :: "char \<Rightarrow> char"
427  where "ascii_of c = Char (digit0 c) (digit1 c) (digit2 c) (digit3 c) (digit4 c) (digit5 c) (digit6 c) False"
428
429qualified lemma ascii_of_Char [simp]:
430  "ascii_of (Char b0 b1 b2 b3 b4 b5 b6 b7) = Char b0 b1 b2 b3 b4 b5 b6 False"
431  by (simp add: ascii_of_def)
432
433qualified lemma not_digit7_ascii_of [simp]:
434  "\<not> digit7 (ascii_of c)"
435  by (simp add: ascii_of_def)
436
437qualified lemma ascii_of_idem:
438  "ascii_of c = c" if "\<not> digit7 c"
439  using that by (cases c) simp
440
441qualified lemma char_of_ascii_of [simp]:
442  "of_char (ascii_of c) = take_bit 7 (of_char c :: nat)"
443  by (cases c) (simp only: ascii_of_Char of_char_Char take_bit_horner_sum_eq, simp)
444
445qualified typedef literal = "{cs. \<forall>c\<in>set cs. \<not> digit7 c}"
446  morphisms explode Abs_literal
447proof
448  show "[] \<in> {cs. \<forall>c\<in>set cs. \<not> digit7 c}"
449    by simp
450qed
451
452qualified setup_lifting type_definition_literal
453
454qualified lift_definition implode :: "string \<Rightarrow> literal"
455  is "map ascii_of"
456  by auto
457
458qualified lemma implode_explode_eq [simp]:
459  "String.implode (String.explode s) = s"
460proof transfer
461  fix cs
462  show "map ascii_of cs = cs" if "\<forall>c\<in>set cs. \<not> digit7 c"
463    using that
464      by (induction cs) (simp_all add: ascii_of_idem)
465qed
466
467qualified lemma explode_implode_eq [simp]:
468  "String.explode (String.implode cs) = map ascii_of cs"
469  by transfer rule
470
471end
472
473
474subsubsection \<open>Syntactic representation\<close>
475
476text \<open>
477  Logical ground representations for literals are:
478
479  \<^enum> \<open>0\<close> for the empty literal;
480
481  \<^enum> \<open>Literal b0 \<dots> b6 s\<close> for a literal starting with one
482    character and continued by another literal.
483
484  Syntactic representations for literals are:
485
486  \<^enum> Printable text as string prefixed with \<open>STR\<close>;
487
488  \<^enum> A single ascii value as numerical hexadecimal value prefixed with \<open>STR\<close>.
489\<close>
490
491instantiation String.literal :: zero
492begin
493
494context
495begin
496
497qualified lift_definition zero_literal :: String.literal
498  is Nil
499  by simp
500
501instance ..
502
503end
504
505end
506
507context
508begin
509
510qualified abbreviation (output) empty_literal :: String.literal
511  where "empty_literal \<equiv> 0"
512
513qualified lift_definition Literal :: "bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> String.literal \<Rightarrow> String.literal"
514  is "\<lambda>b0 b1 b2 b3 b4 b5 b6 cs. Char b0 b1 b2 b3 b4 b5 b6 False # cs"
515  by auto
516
517qualified lemma Literal_eq_iff [simp]:
518  "Literal b0 b1 b2 b3 b4 b5 b6 s = Literal c0 c1 c2 c3 c4 c5 c6 t
519     \<longleftrightarrow> (b0 \<longleftrightarrow> c0) \<and> (b1 \<longleftrightarrow> c1) \<and> (b2 \<longleftrightarrow> c2) \<and> (b3 \<longleftrightarrow> c3)
520         \<and> (b4 \<longleftrightarrow> c4) \<and> (b5 \<longleftrightarrow> c5) \<and> (b6 \<longleftrightarrow> c6) \<and> s = t"
521  by transfer simp
522
523qualified lemma empty_neq_Literal [simp]:
524  "empty_literal \<noteq> Literal b0 b1 b2 b3 b4 b5 b6 s"
525  by transfer simp
526
527qualified lemma Literal_neq_empty [simp]:
528  "Literal b0 b1 b2 b3 b4 b5 b6 s \<noteq> empty_literal"
529  by transfer simp
530
531end
532
533code_datatype "0 :: String.literal" String.Literal
534
535syntax
536  "_Literal" :: "str_position \<Rightarrow> String.literal"   ("STR _")
537  "_Ascii" :: "num_const \<Rightarrow> String.literal"        ("STR _")
538
539ML_file \<open>Tools/literal.ML\<close>
540
541
542subsubsection \<open>Operations\<close>
543
544instantiation String.literal :: plus
545begin
546
547context
548begin
549
550qualified lift_definition plus_literal :: "String.literal \<Rightarrow> String.literal \<Rightarrow> String.literal"
551  is "(@)"
552  by auto
553
554instance ..
555
556end
557
558end
559
560instance String.literal :: monoid_add
561  by (standard; transfer) simp_all
562
563instantiation String.literal :: size
564begin
565
566context
567  includes literal.lifting
568begin
569
570lift_definition size_literal :: "String.literal \<Rightarrow> nat"
571  is length .
572
573end
574
575instance ..
576
577end
578
579instantiation String.literal :: equal
580begin
581
582context
583begin
584
585qualified lift_definition equal_literal :: "String.literal \<Rightarrow> String.literal \<Rightarrow> bool"
586  is HOL.equal .
587
588instance
589  by (standard; transfer) (simp add: equal)
590
591end
592
593end
594
595instantiation String.literal :: linorder
596begin
597
598context
599begin
600
601qualified lift_definition less_eq_literal :: "String.literal \<Rightarrow> String.literal \<Rightarrow> bool"
602  is "ord.lexordp_eq (\<lambda>c d. of_char c < (of_char d :: nat))"
603  .
604
605qualified lift_definition less_literal :: "String.literal \<Rightarrow> String.literal \<Rightarrow> bool"
606  is "ord.lexordp (\<lambda>c d. of_char c < (of_char d :: nat))"
607  .
608
609instance proof -
610  from linorder_char interpret linorder "ord.lexordp_eq (\<lambda>c d. of_char c < (of_char d :: nat))"
611    "ord.lexordp (\<lambda>c d. of_char c < (of_char d :: nat)) :: string \<Rightarrow> string \<Rightarrow> bool"
612    by (rule linorder.lexordp_linorder)
613  show "PROP ?thesis"
614    by (standard; transfer) (simp_all add: less_le_not_le linear)
615qed
616
617end
618
619end
620
621lemma infinite_literal:
622  "infinite (UNIV :: String.literal set)"
623proof -
624  define S where "S = range (\<lambda>n. replicate n CHR ''A'')"
625  have "inj_on String.implode S"
626  proof (rule inj_onI)
627    fix cs ds
628    assume "String.implode cs = String.implode ds"
629    then have "String.explode (String.implode cs) = String.explode (String.implode ds)"
630      by simp
631    moreover assume "cs \<in> S" and "ds \<in> S"
632    ultimately show "cs = ds"
633      by (auto simp add: S_def)
634  qed
635  moreover have "infinite S"
636    by (auto simp add: S_def dest: finite_range_imageI [of _ length])
637  ultimately have "infinite (String.implode ` S)"
638    by (simp add: finite_image_iff)
639  then show ?thesis
640    by (auto intro: finite_subset)
641qed
642
643
644subsubsection \<open>Executable conversions\<close>
645
646context
647begin
648
649qualified lift_definition asciis_of_literal :: "String.literal \<Rightarrow> integer list"
650  is "map of_char"
651  .
652
653qualified lemma asciis_of_zero [simp, code]:
654  "asciis_of_literal 0 = []"
655  by transfer simp
656
657qualified lemma asciis_of_Literal [simp, code]:
658  "asciis_of_literal (String.Literal b0 b1 b2 b3 b4 b5 b6 s) =
659    of_char (Char b0 b1 b2 b3 b4 b5 b6 False) # asciis_of_literal s "
660  by transfer simp
661
662qualified lift_definition literal_of_asciis :: "integer list \<Rightarrow> String.literal"
663  is "map (String.ascii_of \<circ> char_of)"
664  by auto
665
666qualified lemma literal_of_asciis_Nil [simp, code]:
667  "literal_of_asciis [] = 0"
668  by transfer simp
669
670qualified lemma literal_of_asciis_Cons [simp, code]:
671  "literal_of_asciis (k # ks) = (case char_of k
672    of Char b0 b1 b2 b3 b4 b5 b6 b7 \<Rightarrow> String.Literal b0 b1 b2 b3 b4 b5 b6 (literal_of_asciis ks))"
673  by (simp add: char_of_def) (transfer, simp add: char_of_def)
674
675qualified lemma literal_of_asciis_of_literal [simp]:
676  "literal_of_asciis (asciis_of_literal s) = s"
677proof transfer
678  fix cs
679  assume "\<forall>c\<in>set cs. \<not> digit7 c"
680  then show "map (String.ascii_of \<circ> char_of) (map of_char cs) = cs"
681    by (induction cs) (simp_all add: String.ascii_of_idem) 
682qed
683
684qualified lemma explode_code [code]:
685  "String.explode s = map char_of (asciis_of_literal s)"
686  by transfer simp
687
688qualified lemma implode_code [code]:
689  "String.implode cs = literal_of_asciis (map of_char cs)"
690  by transfer simp
691
692qualified lemma equal_literal [code]:
693  "HOL.equal (String.Literal b0 b1 b2 b3 b4 b5 b6 s)
694    (String.Literal a0 a1 a2 a3 a4 a5 a6 r)
695    \<longleftrightarrow> (b0 \<longleftrightarrow> a0) \<and> (b1 \<longleftrightarrow> a1) \<and> (b2 \<longleftrightarrow> a2) \<and> (b3 \<longleftrightarrow> a3)
696      \<and> (b4 \<longleftrightarrow> a4) \<and> (b5 \<longleftrightarrow> a5) \<and> (b6 \<longleftrightarrow> a6) \<and> (s = r)"
697  by (simp add: equal)
698
699end
700
701
702subsubsection \<open>Technical code generation setup\<close>
703
704text \<open>Alternative constructor for generated computations\<close>
705
706context
707begin  
708
709qualified definition Literal' :: "bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> bool \<Rightarrow> String.literal \<Rightarrow> String.literal"
710  where [simp]: "Literal' = String.Literal"
711
712lemma [code]:
713  \<open>Literal' b0 b1 b2 b3 b4 b5 b6 s = String.literal_of_asciis
714    [foldr (\<lambda>b k. of_bool b + k * 2) [b0, b1, b2, b3, b4, b5, b6] 0] + s\<close>
715proof -
716  have \<open>foldr (\<lambda>b k. of_bool b + k * 2) [b0, b1, b2, b3, b4, b5, b6] 0 = of_char (Char b0 b1 b2 b3 b4 b5 b6 False)\<close>
717    by simp
718  moreover have \<open>Literal' b0 b1 b2 b3 b4 b5 b6 s = String.literal_of_asciis
719    [of_char (Char b0 b1 b2 b3 b4 b5 b6 False)] + s\<close>
720    by (unfold Literal'_def) (transfer, simp only: list.simps comp_apply char_of_char, simp)
721  ultimately show ?thesis
722    by simp
723qed
724
725lemma [code_computation_unfold]:
726  "String.Literal = Literal'"
727  by simp
728
729end
730
731code_reserved SML string String Char List
732code_reserved OCaml string String Char List
733code_reserved Haskell Prelude
734code_reserved Scala string
735
736code_printing
737  type_constructor String.literal \<rightharpoonup>
738    (SML) "string"
739    and (OCaml) "string"
740    and (Haskell) "String"
741    and (Scala) "String"
742| constant "STR ''''" \<rightharpoonup>
743    (SML) "\"\""
744    and (OCaml) "\"\""
745    and (Haskell) "\"\""
746    and (Scala) "\"\""
747
748setup \<open>
749  fold Literal.add_code ["SML", "OCaml", "Haskell", "Scala"]
750\<close>
751
752code_printing
753  constant "(+) :: String.literal \<Rightarrow> String.literal \<Rightarrow> String.literal" \<rightharpoonup>
754    (SML) infixl 18 "^"
755    and (OCaml) infixr 6 "^"
756    and (Haskell) infixr 5 "++"
757    and (Scala) infixl 7 "+"
758| constant String.literal_of_asciis \<rightharpoonup>
759    (SML) "!(String.implode/ o List.map (fn k => if 0 <= k andalso k < 128 then (Char.chr o IntInf.toInt) k else raise Fail \"Non-ASCII character in literal\"))"
760    and (OCaml) "!(let xs = _
761      and chr k =
762        let l = Z.to'_int k
763          in if 0 <= l && l < 128
764          then Char.chr l
765          else failwith \"Non-ASCII character in literal\"
766      in String.init (List.length xs) (List.nth (List.map chr xs)))"
767    and (Haskell) "map/ (let chr k | (0 <= k && k < 128) = Prelude.toEnum k :: Prelude.Char in chr . Prelude.fromInteger)"
768    and (Scala) "\"\"/ ++/ _.map((k: BigInt) => if (BigInt(0) <= k && k < BigInt(128)) k.charValue else sys.error(\"Non-ASCII character in literal\"))"
769| constant String.asciis_of_literal \<rightharpoonup>
770    (SML) "!(List.map (fn c => let val k = Char.ord c in if k < 128 then IntInf.fromInt k else raise Fail \"Non-ASCII character in literal\" end) /o String.explode)"
771    and (OCaml) "!(let s = _ in let rec exp i l = if i < 0 then l else exp (i - 1) (let k = Char.code (String.get s i) in
772      if k < 128 then Z.of'_int k :: l else failwith \"Non-ASCII character in literal\") in exp (String.length s - 1) [])"
773    and (Haskell) "map/ (let ord k | (k < 128) = Prelude.toInteger k in ord . (Prelude.fromEnum :: Prelude.Char -> Prelude.Int))"
774    and (Scala) "!(_.toList.map(c => { val k: Int = c.toInt; if (k < 128) BigInt(k) else sys.error(\"Non-ASCII character in literal\") }))"
775| class_instance String.literal :: equal \<rightharpoonup>
776    (Haskell) -
777| constant "HOL.equal :: String.literal \<Rightarrow> String.literal \<Rightarrow> bool" \<rightharpoonup>
778    (SML) "!((_ : string) = _)"
779    and (OCaml) "!((_ : string) = _)"
780    and (Haskell) infix 4 "=="
781    and (Scala) infixl 5 "=="
782| constant "(\<le>) :: String.literal \<Rightarrow> String.literal \<Rightarrow> bool" \<rightharpoonup>
783    (SML) "!((_ : string) <= _)"
784    and (OCaml) "!((_ : string) <= _)"
785    and (Haskell) infix 4 "<="
786    \<comment> \<open>Order operations for \<^typ>\<open>String.literal\<close> work in Haskell only
787          if no type class instance needs to be generated, because String = [Char] in Haskell
788          and \<^typ>\<open>char list\<close> need not have the same order as \<^typ>\<open>String.literal\<close>.\<close>
789    and (Scala) infixl 4 "<="
790    and (Eval) infixl 6 "<="
791| constant "(<) :: String.literal \<Rightarrow> String.literal \<Rightarrow> bool" \<rightharpoonup>
792    (SML) "!((_ : string) < _)"
793    and (OCaml) "!((_ : string) < _)"
794    and (Haskell) infix 4 "<"
795    and (Scala) infixl 4 "<"
796    and (Eval) infixl 6 "<"
797
798
799subsubsection \<open>Code generation utility\<close>
800
801setup \<open>Sign.map_naming (Name_Space.mandatory_path "Code")\<close>
802
803definition abort :: "String.literal \<Rightarrow> (unit \<Rightarrow> 'a) \<Rightarrow> 'a"
804  where [simp]: "abort _ f = f ()"
805
806declare [[code drop: Code.abort]]
807
808lemma abort_cong:
809  "msg = msg' \<Longrightarrow> Code.abort msg f = Code.abort msg' f"
810  by simp
811
812setup \<open>Sign.map_naming Name_Space.parent_path\<close>
813
814setup \<open>Code_Simp.map_ss (Simplifier.add_cong @{thm Code.abort_cong})\<close>
815
816code_printing
817  constant Code.abort \<rightharpoonup>
818    (SML) "!(raise/ Fail/ _)"
819    and (OCaml) "failwith"
820    and (Haskell) "!(error/ ::/ forall a./ String -> (() -> a) -> a)"
821    and (Scala) "!{/ sys.error((_));/ ((_)).apply(())/ }"
822
823
824subsubsection \<open>Finally\<close>
825
826lifting_update literal.lifting
827lifting_forget literal.lifting
828
829end
830