1(*===========================================================================*) 2(* Regexps for numbers and numeric intervals *) 3(*===========================================================================*) 4 5structure Regexp_Numerics :> Regexp_Numerics = 6struct 7 8open Lib Feedback regexpMisc Regexp_Type Regexp_Match; 9 10val ERR = mk_HOL_ERR "Regexp_Numerics"; 11 12type word8 = Word8.word; 13type bigint = IntInf.int 14 15fun copy x n = List.tabulate (n,K x) handle _ => []; 16fun padL list x width = copy x (width - length list) @ list; 17fun padR list x width = list @ copy x (width - length list); 18 19 20(*---------------------------------------------------------------------------*) 21(* Least- and Most- significant byte *) 22(*---------------------------------------------------------------------------*) 23 24datatype endian = LSB | MSB 25 26fun compare_endian(LSB,MSB) = LESS 27 | compare_endian(MSB,LSB) = GREATER 28 | compare_endian other = EQUAL 29 30fun endian2string MSB = "MSB" 31 | endian2string LSB = "LSB"; 32 33fun string2endian "MSB" = SOME MSB 34 | string2endian "LSB" = SOME LSB 35 | string2endian other = NONE; 36 37datatype encoding = Unsigned | Twos_comp | Zigzag | Sign_mag ; 38 39(*---------------------------------------------------------------------------*) 40(* ZigZag encoding as an int -> posint map *) 41(*---------------------------------------------------------------------------*) 42 43fun zigzag i = if i >= 0 then 2*i else (2 * IntInf.abs i) - 1; 44 45fun undo_zigzag n = 46 case IntInf.divMod(n,IntInf.fromInt 2) 47 of (k,0) => k 48 | (k,_) => ~(k+1); 49 50(*---------------------------------------------------------------------------*) 51(* Sign-magnitude encoding as an int -> posint map *) 52(*---------------------------------------------------------------------------*) 53 54fun sign_mag i = if i >= 0 then 2*i else (2 * IntInf.abs i) + 1; 55 56fun undo_sign_mag n = 57 if n = 1 then 0 58 else 59 case IntInf.divMod(n,IntInf.fromInt 2) 60 of (k,0) => k 61 | (k,_) => ~k; 62 63(*---------------------------------------------------------------------------*) 64(* Minimum bytes needed to express unsigned and signed quantities *) 65(*---------------------------------------------------------------------------*) 66 67fun log256 n = IntInf.log2 n div 8; 68 69fun unsigned_width i = 70 if i = 0 then 1 else 71 if 0 < i then 1 + log256 i 72 else raise ERR "unsigned_width" "negative input"; 73 74fun twos_comp_width i = 75 let val k = unsigned_width (IntInf.abs i) 76 val bound = twoE (k*8 - 1) 77 in 78 if ~bound <= i andalso i < bound then k else k+1 79 end; 80 81fun width_of Unsigned = unsigned_width 82 | width_of Twos_comp = twos_comp_width 83 | width_of Zigzag = unsigned_width o zigzag 84 | width_of Sign_mag = unsigned_width o sign_mag; 85 86fun pair_max f = Int.max o (f##f) 87 88fun interval_width Unsigned = pair_max unsigned_width 89 | interval_width Twos_comp = pair_max twos_comp_width 90 | interval_width Zigzag = pair_max (width_of Zigzag) 91 | interval_width Sign_mag = pair_max (width_of Sign_mag); 92 93(*---------------------------------------------------------------------------*) 94(* Twos complement encoding as an int -> posint map. *) 95(*---------------------------------------------------------------------------*) 96 97fun twos_comp w i = 98 let val top = twoE (8*w) 99 val bound = IntInf.div(top, IntInf.fromInt 2) 100 in if ~bound <= i andalso i < bound then 101 (if i >= 0 then i else top + i) 102 else raise ERR "twos_comp" "width too small" 103 end; 104 105fun undo_twos_comp w n = 106 let val top = twoE (8*w) 107 val bound = IntInf.div(top, IntInf.fromInt 2) 108 in if 0 <= n andalso n < top then 109 (if n < bound then n else ~(top - n)) 110 else raise ERR "undo_twos_comp" "width too small" 111 end; 112 113(*---------------------------------------------------------------------------*) 114(* Basic maps between bytes, chars, and ints *) 115(*---------------------------------------------------------------------------*) 116 117val byte2char = Char.chr o Word8.toInt; 118val char2byte = Word8.fromInt o Char.ord; 119 120fun ii2byte i = Word8.fromInt (IntInf.toInt i); 121fun byte2ii w = IntInf.fromInt(Word8.toInt w); 122 123val eight = Word.fromInt 8 124 125(*---------------------------------------------------------------------------*) 126(* Maps to and from byte lists *) 127(*---------------------------------------------------------------------------*) 128 129val n2l = 130 let open IntInf 131 fun chunks n = 132 if n=0 then [] 133 else toInt(op mod(n,256)) :: chunks (op div(n,256)) 134 in 135 fn i:IntInf.int => 136 if i < 0 then 137 raise ERR "n2l" "negative input" 138 else 139 chunks i 140 end; 141 142fun l2n [] = IntInf.fromInt 0 143 | l2n (h::t) = IntInf.fromInt h + 256 * l2n t; 144 145(*---------------------------------------------------------------------------*) 146(* bytes_of w i lays out i into w bytes in LSB *) 147(* *) 148(* w should be large enough to capture i *) 149(*---------------------------------------------------------------------------*) 150 151fun bytes_of w i = 152 let open IntInf 153 in if Int.<= (w,1) then 154 [ii2byte i] 155 else 156 ii2byte (andb(i,0xFF)) :: bytes_of (Int.-(w,1)) (~>>(i,eight)) 157 end 158 159(*---------------------------------------------------------------------------*) 160(* Maps from numbers to strings *) 161(*---------------------------------------------------------------------------*) 162 163fun ii2s LSB w i = String.implode (map byte2char (bytes_of w i)) 164 | ii2s MSB w i = String.implode (rev (map byte2char (bytes_of w i))) 165 166(*---------------------------------------------------------------------------*) 167(* It can be that the specified width is not enough to properly represent i, *) 168(* so we take the max *) 169(*---------------------------------------------------------------------------*) 170 171fun iint2string enc d w i = 172 let val width = Int.max(w,width_of enc i) 173 in ii2s d width 174 (case enc 175 of Unsigned => i 176 | Twos_comp => twos_comp width i 177 | Zigzag => zigzag i 178 | Sign_mag => sign_mag i) 179 end 180 181val int2string = iint2string Twos_comp LSB 1 o IntInf.fromInt; 182 183(*---------------------------------------------------------------------------*) 184(* Maps from strings to numbers *) 185(*---------------------------------------------------------------------------*) 186 187val unsigned_value_of = l2n o map Word8.toInt; 188 189fun twos_comp_value_of (list : word8 list) = 190 let open IntInf 191 in case list 192 of [] => IntInf.fromInt 0 193 | [e] => Word8.toLargeIntX e 194 | h::t => byte2ii h + 256 * twos_comp_value_of t 195 end; 196 197fun string2iint enc dir = 198 let fun vfn Unsigned = unsigned_value_of 199 | vfn Twos_comp = twos_comp_value_of 200 | vfn Zigzag = zigzag o unsigned_value_of 201 | vfn Sign_mag = sign_mag o unsigned_value_of 202 in 203 case dir 204 of LSB => vfn enc o List.map char2byte o String.explode 205 | MSB => vfn enc o rev o List.map char2byte o String.explode 206 end; 207 208val string2int = IntInf.toInt o string2iint Twos_comp LSB; 209 210(*===========================================================================*) 211(* Intervals *) 212(*===========================================================================*) 213 214fun num2regexp n = Chset(charset_span n n); 215 216val zero_regexp = num2regexp 0; 217 218(*---------------------------------------------------------------------------*) 219(* Regexp for the interval {n | 0 <= lo <= n <= hi} in binary encoding. The *) 220(* dir argument specifies the order in which bytes of the number are laid *) 221(* out, and the width argument specifies the *minimum* width, in bytes, that *) 222(* the number is expected to fill. *) 223(*---------------------------------------------------------------------------*) 224 225fun num_interval dir width (lo,hi) = 226 let val _ = if lo < 0 orelse hi < lo then 227 raise ERR "num_interval" "malformed interval" 228 else () 229 val _ = if width < interval_width Unsigned (lo,hi) then 230 raise ERR "num_interval" "given width not large enough" 231 else () 232 val lorep = rev(padR (n2l lo) 0 width) 233 val hirep = rev(padR (n2l hi) 0 width) 234 fun finalize LoL = 235 case dir 236 of LSB => rev (map catlist LoL) 237 | MSB => rev (map (catlist o rev) LoL) 238 fun mk_ivls [] acc = mk_or (finalize acc) 239 | mk_ivls ((prefix,[],[])::t) acc = raise ERR "num_interval" "empty lists" 240 | mk_ivls ((prefix,[r1],[r2])::t) acc = 241 mk_ivls t ((Chset(charset_span r1 r2)::prefix)::acc) 242 | mk_ivls ((prefix,q1::r1,q2::r2)::t) acc = 243 if q1=q2 then 244 mk_ivls ((num2regexp q1::prefix,r1,r2)::t) acc 245 else (* have q1 < q2 *) 246 if Lib.all (equal 0) r1 andalso Lib.all (equal 255) r2 then 247 let val thing = dots (length r1) @ (Chset(charset_span q1 q2) :: prefix) 248 in mk_ivls t (thing::acc) 249 end 250 else 251 if q1=0 then (* fill up to e2 slot *) 252 let val w = 1 + length r1 253 val ceil = padR [1] 0 w 254 val preceil = padR [255] 255 (w-1) 255 val thing1 = (num2regexp 0::prefix,r1,preceil) 256 val thing2 = (prefix,ceil,q2::r2) 257(* following needs more thought to be better than current version 258 val thing1 = (prefix,q1::r1,padR [q2-1] 255 w) 259 val thing2 = (prefix,padR [q2] 0 w,q2::r2) 260*) 261 in 262 mk_ivls (thing1::thing2::t) acc 263 end 264 else 265 let val w = 1 + length r1 266 in case (Lib.all (equal 0) r1,Lib.all (equal 255) r2) 267 of (true,true) => raise ERR "mk_ivls" "inaccessible" 268 | (true,false) => 269 let val thing1 = (prefix,q1::r1,padR [q2-1] 255 w) 270 val thing2 = (prefix,padR [q2] 0 w,q2::r2) 271 in mk_ivls (thing1::thing2::t) acc 272 end 273 | (false,true) => 274 let val thing1 = (prefix,q1::r1,padR [q1] 255 w) 275 val thing2 = (prefix,padR [q1+1] 0 w,q2::r2) 276 in mk_ivls (thing1::thing2::t) acc 277 end 278 | (false,false) => 279 let val thing1 = (prefix,q1::r1,padR [q1] 255 w) 280 val thing2 = 281 if (q1 + 1 < q2) 282 then [(prefix,padR [q1+1] 0 w,padR [q2-1] 255 w)] 283 else [] 284 val thing3 = (prefix,padR [q2] 0 w,q2::r2) 285 in 286 mk_ivls ([thing1] @ thing2 @ [thing3] @ t) acc 287 end 288 end 289 | mk_ivls other wise = raise ERR "num_interval" "lists of different length" 290 in 291 mk_ivls [([]:regexp list,lorep,hirep)] [] 292 end 293 294 295(* 296fun Ninterval lo hi = 297 num_interval MSB (unsigned_width (IntInf.fromInt hi)) 298 (IntInf.fromInt lo) 299 (IntInf.fromInt hi) 300Ninterval 38 23567900; 301Ninterval 67537 81005; 302Ninterval 0 4611686018427387903; (* Int63.maxInt *) 303*) 304 305(*---------------------------------------------------------------------------*) 306(* Regexp for binary twos complement representations of integers in the *) 307(* interval lo .. hi. *) 308(*---------------------------------------------------------------------------*) 309 310fun twos_comp_interval dir width (lo,hi) = 311 if hi < lo then 312 raise ERR "twos_comp_interval" "hi less than lo" 313 else 314 if width < interval_width Twos_comp (lo,hi) then 315 raise ERR "twos_comp_interval" "width is too small" 316 else 317 if 0 <= lo andalso 0 <= hi then 318 num_interval dir width (lo,hi) 319 else 320 let val top = twoE (width * 8) 321 in 322 if lo < 0 andalso hi >= 0 then 323 (if hi + 1 = top + lo (* contiguous *) then 324 catlist (dots width) 325 else 326 Or [num_interval dir width (top + lo,top-1), 327 num_interval dir width (0,hi)]) 328 else 329 if lo < 0 andalso hi < 0 then 330 num_interval dir width (top+lo,top+hi) 331 else raise ERR "twos_comp_interval" "unexpected values for lo and hi" 332 end 333 334(*---------------------------------------------------------------------------*) 335(* Even and odd numbers *) 336(*---------------------------------------------------------------------------*) 337 338val small_evens = filter (fn x => x mod 2 = 0) (upto 0 255); 339val small_odds = filter (fn x => x mod 2 = 1) (upto 0 255); 340val sme_charset = Regexp_Type.charset_of (map Char.chr small_evens); 341val smo_charset = Regexp_Type.charset_of (map Char.chr small_odds); 342 343fun EVEN LSB = Cat(Chset sme_charset,Star DOT) 344 | EVEN MSB = Cat(Star DOT,Chset sme_charset); 345 346fun ODD LSB = Cat(Chset smo_charset,Star DOT) 347 | ODD MSB = Cat(Star DOT, Chset smo_charset); 348 349(*---------------------------------------------------------------------------*) 350(* Regexp for "zigzag" encoded integer in the interval [lo,hi] *) 351(*---------------------------------------------------------------------------*) 352 353fun pos_zigzag dir (lo,hi) = 354 let val w = interval_width Zigzag (lo,hi) 355 in And(EVEN dir, num_interval dir w (2*lo, 2*hi)) 356 end 357 358fun neg_zigzag dir (lo,hi) = 359 let val w = interval_width Zigzag (lo,hi) 360 in And(ODD dir, num_interval dir w (2*(abs hi)-1, 2*(abs lo)-1)) 361 end 362 363fun zigzag_interval dir width (lo,hi) = 364 if hi < lo then 365 raise ERR "zigzag_interval" "hi less than lo" 366 else 367 if width < interval_width Zigzag (lo,hi) then 368 raise ERR "zigzag_interval" "width is too small" 369 else 370 if 0 <= lo (* all positive *) then 371 pos_zigzag dir (lo, hi) else 372 if hi < 0 (* all negative *) then 373 neg_zigzag dir (lo,hi) 374 else (* lo < 0 and 0 <= hi *) 375 let val lomag = IntInf.abs lo 376 in if lomag < hi then 377 let val span = num_interval dir width (0, zigzag lomag) (* [lo,lomag] *) 378 val upper = pos_zigzag dir (lomag+1,hi) 379 in mk_or [span, upper] 380 end 381 else 382 if hi < lomag then 383 let val span = num_interval dir width (0, zigzag hi) (* [-hi,hi] *) 384 val lower = neg_zigzag dir (lo,~(hi+1)) 385 in mk_or [span,lower] 386 end 387 else (* hi = lomag *) 388 num_interval dir width (0, zigzag hi) 389 end; 390 391(*---------------------------------------------------------------------------*) 392(* Regexp for sign-magnitude encoded integer in the interval [lo,hi] *) 393(*---------------------------------------------------------------------------*) 394 395fun pos_sign_mag dir (lo,hi) = 396 let val w = interval_width Sign_mag (lo,hi) 397 val base = And(EVEN dir, num_interval dir w (2*lo, 2*hi)) 398 in if lo = 0 then 399 mk_or[num_interval dir w (1,1), base] 400 else base 401 end 402 403fun neg_sign_mag dir (lo,hi) = 404 let val w = interval_width Sign_mag (lo,hi) 405 in And(ODD dir, num_interval dir w (2*(abs hi)+1, 2*(abs lo)+1)) 406 end 407 408fun sign_mag_interval dir width (lo,hi) = 409 if hi < lo then 410 raise ERR "sign_mag_interval" "hi less than lo" 411 else 412 if width < interval_width Sign_mag (lo,hi) then 413 raise ERR "sign_mag_interval" "width is too small" 414 else 415 if 0 <= lo (* all positive *) then 416 pos_sign_mag dir (lo, hi) else 417 if hi < 0 (* all negative *) then 418 neg_sign_mag dir (lo,hi) 419 else (* lo < 0 and 0 <= hi *) 420 let val lomag = IntInf.abs lo 421 in if lomag < hi then 422 let val span = num_interval dir width (0, sign_mag lo) (* [lo,lomag] *) 423 val upper = pos_sign_mag dir (lomag+1,hi) 424 in mk_or [span, upper] 425 end 426 else 427 if hi < lomag then 428 let val span = num_interval dir width (0, sign_mag (~hi)) (* [-hi,hi] *) 429 val lower = neg_sign_mag dir (lo,~(hi+1)) 430 in mk_or [span,lower] 431 end 432 else (* hi = lomag *) 433 num_interval dir width (0, sign_mag (~hi)) 434 end; 435 436fun interval_regexp Unsigned = num_interval 437 | interval_regexp Twos_comp = twos_comp_interval 438 | interval_regexp Zigzag = zigzag_interval 439 | interval_regexp Sign_mag = sign_mag_interval; 440 441fun test_interval enc dir w (lo,hi) = 442 let val regexp = Regexp_Match.normalize(interval_regexp enc dir w (lo,hi)) 443 val dfa = Regexp_Match.matcher regexp 444 val matcher = #matchfn dfa 445 val states = length (#final dfa) 446 val _ = print (Int.toString states^" states\n") 447 fun testFn i = matcher (iint2string enc dir w i) 448 val ivl = bigUpto lo hi 449 val _ = if Lib.all I (map testFn ivl) then 450 print "all elements of interval matched" else 451 raise ERR "test_interval" "number in interval fails match" 452 in 453 (regexp,states,testFn) 454 end; 455 456(* 457val (r,n,testFn) = test_interval Unsigned LSB 2 (12, 1234); 458val (r,n,testFn) = test_interval Twos_comp LSB 2 (12, 1234); 459val (r,n,testFn) = test_interval Zigzag LSB 2 (12, 1234); 460val (r,n,testFn) = test_interval Sign_mag LSB 2 (12, 1234); 461 462val (r,n,testFn) = test_interval Twos_comp LSB 2 (~12, 1234); 463val (r,n,testFn) = test_interval Zigzag LSB 2 (~12, 1234); 464val (r,n,testFn) = test_interval Sign_mag LSB 2 (~12, 1234); 465 466*) 467 468(*---------------------------------------------------------------------------*) 469(* Regexp for numbers >= n of arbitrary size *) 470(*---------------------------------------------------------------------------*) 471 472(* 473fun LEQ enc dir width n = 474 if n < 0 then 475 raise ERR "LEQ" "negative input" 476 else 477 interval_regexp enc dir width (0,n); 478 479fun GTR enc dir width n = And(EVEN dir, Neg (LEQ enc dir width n)); 480 481*) 482 483end 484