1(*===========================================================================*)
2(* Regexps for numbers and numeric intervals                                 *)
3(*===========================================================================*)
4
5structure Regexp_Numerics :> Regexp_Numerics =
6struct
7
8open Lib Feedback regexpMisc Regexp_Type Regexp_Match;
9
10val ERR = mk_HOL_ERR "Regexp_Numerics";
11
12type word8 = Word8.word;
13type bigint = IntInf.int
14
15fun copy x n = List.tabulate (n,K x) handle _ => [];
16fun padL list x width = copy x (width - length list) @ list;
17fun padR list x width = list @ copy x (width - length list);
18
19
20(*---------------------------------------------------------------------------*)
21(* Least- and Most- significant byte                                         *)
22(*---------------------------------------------------------------------------*)
23
24datatype endian = LSB | MSB
25
26fun compare_endian(LSB,MSB) = LESS
27  | compare_endian(MSB,LSB) = GREATER
28  | compare_endian other    = EQUAL
29
30fun endian2string MSB = "MSB"
31  | endian2string LSB = "LSB";
32
33fun string2endian "MSB" = SOME MSB
34  | string2endian "LSB" = SOME LSB
35  | string2endian other = NONE;
36
37datatype encoding = Unsigned | Twos_comp | Zigzag | Sign_mag ;
38
39(*---------------------------------------------------------------------------*)
40(* ZigZag encoding as an int -> posint map                                   *)
41(*---------------------------------------------------------------------------*)
42
43fun zigzag i = if i >= 0 then 2*i else (2 * IntInf.abs i) - 1;
44
45fun undo_zigzag n =
46  case IntInf.divMod(n,IntInf.fromInt 2)
47   of (k,0) => k
48    | (k,_) => ~(k+1);
49
50(*---------------------------------------------------------------------------*)
51(* Sign-magnitude encoding as an int -> posint map                           *)
52(*---------------------------------------------------------------------------*)
53
54fun sign_mag i = if i >= 0 then 2*i else (2 * IntInf.abs i) + 1;
55
56fun undo_sign_mag n =
57 if n = 1 then 0
58 else
59  case IntInf.divMod(n,IntInf.fromInt 2)
60   of (k,0) => k
61    | (k,_) => ~k;
62
63(*---------------------------------------------------------------------------*)
64(* Minimum bytes needed to express unsigned and signed quantities            *)
65(*---------------------------------------------------------------------------*)
66
67fun log256 n = IntInf.log2 n div 8;
68
69fun unsigned_width i =
70   if i = 0 then 1 else
71   if 0 < i then 1 + log256 i
72   else raise ERR "unsigned_width" "negative input";
73
74fun twos_comp_width i =
75 let val k = unsigned_width (IntInf.abs i)
76     val bound = twoE (k*8 - 1)
77 in
78    if ~bound <= i andalso i < bound then k else k+1
79 end;
80
81fun width_of Unsigned  = unsigned_width
82  | width_of Twos_comp = twos_comp_width
83  | width_of Zigzag    = unsigned_width o zigzag
84  | width_of Sign_mag  = unsigned_width o sign_mag;
85
86fun pair_max f = Int.max o (f##f)
87
88fun interval_width Unsigned  = pair_max unsigned_width
89  | interval_width Twos_comp = pair_max twos_comp_width
90  | interval_width Zigzag    = pair_max (width_of Zigzag)
91  | interval_width Sign_mag  = pair_max (width_of Sign_mag);
92
93(*---------------------------------------------------------------------------*)
94(* Twos complement encoding as an int -> posint map.                         *)
95(*---------------------------------------------------------------------------*)
96
97fun twos_comp w i =
98 let val top = twoE (8*w)
99     val bound = IntInf.div(top, IntInf.fromInt 2)
100 in if ~bound <= i andalso i < bound then
101       (if i >= 0 then i else top + i)
102    else raise ERR "twos_comp" "width too small"
103 end;
104
105fun undo_twos_comp w n =
106 let val top = twoE (8*w)
107     val bound = IntInf.div(top, IntInf.fromInt 2)
108 in if 0 <= n andalso n < top then
109      (if n < bound then n else ~(top - n))
110    else raise ERR "undo_twos_comp" "width too small"
111 end;
112
113(*---------------------------------------------------------------------------*)
114(* Basic maps between bytes, chars, and ints                                 *)
115(*---------------------------------------------------------------------------*)
116
117val byte2char = Char.chr o Word8.toInt;
118val char2byte = Word8.fromInt o Char.ord;
119
120fun ii2byte i = Word8.fromInt (IntInf.toInt i);
121fun byte2ii w = IntInf.fromInt(Word8.toInt w);
122
123val eight = Word.fromInt 8
124
125(*---------------------------------------------------------------------------*)
126(* Maps to and from byte lists                                               *)
127(*---------------------------------------------------------------------------*)
128
129val n2l =
130 let open IntInf
131     fun chunks n =
132       if n=0 then []
133       else toInt(op mod(n,256)) :: chunks (op div(n,256))
134 in
135   fn i:IntInf.int =>
136        if i < 0 then
137           raise ERR "n2l" "negative input"
138         else
139           chunks i
140 end;
141
142fun l2n [] = IntInf.fromInt 0
143  | l2n (h::t) = IntInf.fromInt h + 256 * l2n t;
144
145(*---------------------------------------------------------------------------*)
146(* bytes_of w i lays out i into w bytes in LSB                               *)
147(*                                                                           *)
148(* w should be large enough to capture i                                     *)
149(*---------------------------------------------------------------------------*)
150
151fun bytes_of w i =
152 let open IntInf
153 in if Int.<= (w,1) then
154      [ii2byte i]
155    else
156      ii2byte (andb(i,0xFF)) :: bytes_of (Int.-(w,1)) (~>>(i,eight))
157 end
158
159(*---------------------------------------------------------------------------*)
160(* Maps from numbers to strings                                              *)
161(*---------------------------------------------------------------------------*)
162
163fun ii2s LSB w i = String.implode (map byte2char (bytes_of w i))
164  | ii2s MSB w i = String.implode (rev (map byte2char (bytes_of w i)))
165
166(*---------------------------------------------------------------------------*)
167(* It can be that the specified width is not enough to properly represent i, *)
168(* so we take the max                                                        *)
169(*---------------------------------------------------------------------------*)
170
171fun iint2string enc d w i =
172 let val width = Int.max(w,width_of enc i)
173 in ii2s d width
174      (case enc
175        of Unsigned  => i
176         | Twos_comp => twos_comp width i
177         | Zigzag    => zigzag i
178         | Sign_mag  => sign_mag i)
179 end
180
181val int2string = iint2string Twos_comp LSB 1 o IntInf.fromInt;
182
183(*---------------------------------------------------------------------------*)
184(* Maps from strings to numbers                                              *)
185(*---------------------------------------------------------------------------*)
186
187val unsigned_value_of = l2n o map Word8.toInt;
188
189fun twos_comp_value_of (list : word8 list) =
190 let open IntInf
191 in case list
192     of [] => IntInf.fromInt 0
193      | [e] => Word8.toLargeIntX e
194      | h::t => byte2ii h + 256 * twos_comp_value_of t
195 end;
196
197fun string2iint enc dir =
198 let fun vfn Unsigned  = unsigned_value_of
199       | vfn Twos_comp = twos_comp_value_of
200       | vfn Zigzag    = zigzag o unsigned_value_of
201       | vfn Sign_mag  = sign_mag o unsigned_value_of
202 in
203  case dir
204   of LSB => vfn enc o List.map char2byte o String.explode
205    | MSB => vfn enc o rev o List.map char2byte o String.explode
206 end;
207
208val string2int = IntInf.toInt o string2iint Twos_comp LSB;
209
210(*===========================================================================*)
211(* Intervals                                                                 *)
212(*===========================================================================*)
213
214fun num2regexp n = Chset(charset_span n n);
215
216val zero_regexp = num2regexp 0;
217
218(*---------------------------------------------------------------------------*)
219(* Regexp for the interval {n | 0 <= lo <= n <= hi} in binary encoding. The  *)
220(* dir argument specifies the order in which bytes of the number are laid    *)
221(* out, and the width argument specifies the *minimum* width, in bytes, that *)
222(* the number is expected to fill.                                           *)
223(*---------------------------------------------------------------------------*)
224
225fun num_interval dir width (lo,hi) =
226 let val _ = if lo < 0 orelse hi < lo then
227               raise ERR "num_interval" "malformed interval"
228             else ()
229     val _ = if width < interval_width Unsigned (lo,hi) then
230               raise ERR "num_interval" "given width not large enough"
231             else ()
232     val lorep = rev(padR (n2l lo) 0 width)
233     val hirep = rev(padR (n2l hi) 0 width)
234     fun finalize LoL =
235         case dir
236          of LSB => rev (map catlist LoL)
237           | MSB => rev (map (catlist o rev) LoL)
238     fun mk_ivls [] acc = mk_or (finalize acc)
239       | mk_ivls ((prefix,[],[])::t) acc = raise ERR "num_interval" "empty lists"
240       | mk_ivls ((prefix,[r1],[r2])::t) acc =
241                mk_ivls t ((Chset(charset_span r1 r2)::prefix)::acc)
242       | mk_ivls ((prefix,q1::r1,q2::r2)::t) acc =
243          if q1=q2 then
244             mk_ivls ((num2regexp q1::prefix,r1,r2)::t) acc
245          else (* have q1 < q2 *)
246          if Lib.all (equal 0) r1 andalso Lib.all (equal 255) r2 then
247             let val thing = dots (length r1) @ (Chset(charset_span q1 q2) :: prefix)
248             in mk_ivls t (thing::acc)
249             end
250          else
251          if q1=0 then  (* fill up to e2 slot *)
252             let val w = 1 + length r1
253                 val ceil = padR [1] 0 w
254                 val preceil = padR [255] 255 (w-1)
255                 val thing1 = (num2regexp 0::prefix,r1,preceil)
256                 val thing2 = (prefix,ceil,q2::r2)
257(*       following needs more thought to be better than current version
258         val thing1 = (prefix,q1::r1,padR [q2-1] 255 w)
259         val thing2 = (prefix,padR [q2] 0 w,q2::r2)
260*)
261             in
262                mk_ivls (thing1::thing2::t) acc
263             end
264          else
265          let val w = 1 + length r1
266          in case (Lib.all (equal 0) r1,Lib.all (equal 255) r2)
267              of (true,true)  => raise ERR "mk_ivls" "inaccessible"
268               | (true,false) =>
269                   let val thing1 = (prefix,q1::r1,padR [q2-1] 255 w)
270                       val thing2 = (prefix,padR [q2] 0 w,q2::r2)
271                   in mk_ivls (thing1::thing2::t) acc
272                   end
273               | (false,true) =>
274                   let val thing1 = (prefix,q1::r1,padR [q1] 255 w)
275                       val thing2 = (prefix,padR [q1+1] 0 w,q2::r2)
276                   in mk_ivls (thing1::thing2::t) acc
277                   end
278               | (false,false) =>
279                   let val thing1 = (prefix,q1::r1,padR [q1] 255 w)
280                       val thing2 =
281                          if (q1 + 1 < q2)
282                           then [(prefix,padR [q1+1] 0 w,padR [q2-1] 255 w)]
283                           else []
284                       val thing3 = (prefix,padR [q2] 0 w,q2::r2)
285                   in
286                     mk_ivls ([thing1] @ thing2 @ [thing3] @ t) acc
287                   end
288          end
289       | mk_ivls other wise = raise ERR "num_interval" "lists of different length"
290 in
291   mk_ivls [([]:regexp list,lorep,hirep)] []
292 end
293
294
295(*
296fun Ninterval lo hi =
297  num_interval MSB (unsigned_width (IntInf.fromInt hi))
298       (IntInf.fromInt lo)
299       (IntInf.fromInt hi)
300Ninterval 38 23567900;
301Ninterval 67537 81005;
302Ninterval 0 4611686018427387903;  (* Int63.maxInt *)
303*)
304
305(*---------------------------------------------------------------------------*)
306(* Regexp for binary twos complement representations of integers in the      *)
307(* interval lo .. hi.                                                        *)
308(*---------------------------------------------------------------------------*)
309
310fun twos_comp_interval dir width (lo,hi) =
311 if hi < lo then
312     raise ERR "twos_comp_interval" "hi less than lo"
313 else
314 if width < interval_width Twos_comp (lo,hi) then
315   raise ERR "twos_comp_interval" "width is too small"
316 else
317 if 0 <= lo andalso 0 <= hi then
318    num_interval dir width (lo,hi)
319 else
320 let val top = twoE (width * 8)
321 in
322   if lo < 0 andalso hi >= 0 then
323       (if hi + 1 = top + lo  (* contiguous *) then
324           catlist (dots width)
325        else
326           Or [num_interval dir width (top + lo,top-1),
327               num_interval dir width (0,hi)])
328   else
329   if lo < 0 andalso hi < 0 then
330      num_interval dir width (top+lo,top+hi)
331   else raise ERR "twos_comp_interval" "unexpected values for lo and hi"
332 end
333
334(*---------------------------------------------------------------------------*)
335(* Even and odd numbers                                                      *)
336(*---------------------------------------------------------------------------*)
337
338val small_evens = filter (fn x => x mod 2 = 0) (upto 0 255);
339val small_odds  = filter (fn x => x mod 2 = 1) (upto 0 255);
340val sme_charset = Regexp_Type.charset_of (map Char.chr small_evens);
341val smo_charset = Regexp_Type.charset_of (map Char.chr small_odds);
342
343fun EVEN LSB = Cat(Chset sme_charset,Star DOT)
344  | EVEN MSB = Cat(Star DOT,Chset sme_charset);
345
346fun ODD LSB = Cat(Chset smo_charset,Star DOT)
347  | ODD MSB = Cat(Star DOT, Chset smo_charset);
348
349(*---------------------------------------------------------------------------*)
350(* Regexp for "zigzag" encoded integer in the interval [lo,hi]               *)
351(*---------------------------------------------------------------------------*)
352
353fun pos_zigzag dir (lo,hi) =
354 let val w = interval_width Zigzag (lo,hi)
355 in And(EVEN dir, num_interval dir w (2*lo, 2*hi))
356 end
357
358fun neg_zigzag dir (lo,hi) =
359 let val w = interval_width Zigzag (lo,hi)
360 in And(ODD dir, num_interval dir w (2*(abs hi)-1, 2*(abs lo)-1))
361 end
362
363fun zigzag_interval dir width (lo,hi) =
364 if hi < lo then
365     raise ERR "zigzag_interval" "hi less than lo"
366 else
367 if width < interval_width Zigzag (lo,hi) then
368   raise ERR "zigzag_interval" "width is too small"
369 else
370 if 0 <= lo (* all positive *) then
371    pos_zigzag dir (lo, hi) else
372 if hi < 0 (* all negative *) then
373    neg_zigzag dir (lo,hi)
374 else (* lo < 0 and 0 <= hi *)
375  let val lomag = IntInf.abs lo
376  in if lomag < hi then
377        let val span = num_interval dir width (0, zigzag lomag) (* [lo,lomag] *)
378            val upper = pos_zigzag dir (lomag+1,hi)
379        in mk_or [span, upper]
380        end
381     else
382      if hi < lomag then
383        let val span = num_interval dir width (0, zigzag hi) (* [-hi,hi] *)
384            val lower = neg_zigzag dir (lo,~(hi+1))
385        in mk_or [span,lower]
386        end
387     else (* hi = lomag *)
388      num_interval dir width (0, zigzag hi)
389  end;
390
391(*---------------------------------------------------------------------------*)
392(* Regexp for sign-magnitude encoded integer in the interval [lo,hi]         *)
393(*---------------------------------------------------------------------------*)
394
395fun pos_sign_mag dir (lo,hi) =
396 let val w = interval_width Sign_mag (lo,hi)
397     val base = And(EVEN dir, num_interval dir w (2*lo, 2*hi))
398 in if lo = 0 then
399       mk_or[num_interval dir w (1,1), base]
400    else base
401 end
402
403fun neg_sign_mag dir (lo,hi) =
404 let val w = interval_width Sign_mag (lo,hi)
405 in And(ODD dir, num_interval dir w (2*(abs hi)+1, 2*(abs lo)+1))
406 end
407
408fun sign_mag_interval dir width (lo,hi) =
409 if hi < lo then
410     raise ERR "sign_mag_interval" "hi less than lo"
411 else
412 if width < interval_width Sign_mag (lo,hi) then
413   raise ERR "sign_mag_interval" "width is too small"
414 else
415 if 0 <= lo (* all positive *) then
416    pos_sign_mag dir (lo, hi) else
417 if hi < 0 (* all negative *) then
418    neg_sign_mag dir (lo,hi)
419 else (* lo < 0 and 0 <= hi *)
420  let val lomag = IntInf.abs lo
421  in if lomag < hi then
422        let val span = num_interval dir width (0, sign_mag lo) (* [lo,lomag] *)
423            val upper = pos_sign_mag dir (lomag+1,hi)
424        in mk_or [span, upper]
425        end
426     else
427      if hi < lomag then
428        let val span = num_interval dir width (0, sign_mag (~hi)) (* [-hi,hi] *)
429            val lower = neg_sign_mag dir (lo,~(hi+1))
430        in mk_or [span,lower]
431        end
432     else (* hi = lomag *)
433      num_interval dir width (0, sign_mag (~hi))
434  end;
435
436fun interval_regexp Unsigned  = num_interval
437  | interval_regexp Twos_comp = twos_comp_interval
438  | interval_regexp Zigzag    = zigzag_interval
439  | interval_regexp Sign_mag  = sign_mag_interval;
440
441fun test_interval enc dir w (lo,hi) =
442 let val regexp = Regexp_Match.normalize(interval_regexp enc dir w (lo,hi))
443     val dfa = Regexp_Match.matcher regexp
444     val matcher = #matchfn dfa
445     val states = length (#final dfa)
446     val _ = print (Int.toString states^" states\n")
447     fun testFn i = matcher (iint2string enc dir w i)
448     val ivl = bigUpto lo hi
449     val _ = if Lib.all I (map testFn ivl) then
450              print "all elements of interval matched" else
451             raise ERR "test_interval" "number in interval fails match"
452 in
453   (regexp,states,testFn)
454 end;
455
456(*
457val (r,n,testFn) = test_interval Unsigned LSB 2 (12, 1234);
458val (r,n,testFn) = test_interval Twos_comp LSB 2 (12, 1234);
459val (r,n,testFn) = test_interval Zigzag LSB 2 (12, 1234);
460val (r,n,testFn) = test_interval Sign_mag LSB 2 (12, 1234);
461
462val (r,n,testFn) = test_interval Twos_comp LSB 2 (~12, 1234);
463val (r,n,testFn) = test_interval Zigzag LSB 2 (~12, 1234);
464val (r,n,testFn) = test_interval Sign_mag LSB 2 (~12, 1234);
465
466*)
467
468(*---------------------------------------------------------------------------*)
469(* Regexp for numbers >= n of arbitrary size                                 *)
470(*---------------------------------------------------------------------------*)
471
472(*
473fun LEQ enc dir width n =
474 if n < 0 then
475    raise ERR "LEQ" "negative input"
476 else
477    interval_regexp enc dir width (0,n);
478
479fun GTR enc dir width n = And(EVEN dir, Neg (LEQ enc dir width n));
480
481*)
482
483end
484