1/********************************************************************** 2 3 pack.c - 4 5 $Author: eregon $ 6 created at: Thu Feb 10 15:17:05 JST 1994 7 8 Copyright (C) 1993-2007 Yukihiro Matsumoto 9 10**********************************************************************/ 11 12#include "ruby/ruby.h" 13#include "ruby/encoding.h" 14#include <sys/types.h> 15#include <ctype.h> 16#include <errno.h> 17 18#define GCC_VERSION_SINCE(major, minor, patchlevel) \ 19 (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \ 20 ((__GNUC__ > (major)) || \ 21 (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \ 22 (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel)))) 23#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 24# define NATINT_PACK 25#endif 26 27#ifdef DYNAMIC_ENDIAN 28 /* for universal binary of NEXTSTEP and MacOS X */ 29 /* useless since autoconf 2.63? */ 30 static int 31 is_bigendian(void) 32 { 33 static int init = 0; 34 static int endian_value; 35 char *p; 36 37 if (init) return endian_value; 38 init = 1; 39 p = (char*)&init; 40 return endian_value = p[0]?0:1; 41 } 42# define BIGENDIAN_P() (is_bigendian()) 43#elif defined(WORDS_BIGENDIAN) 44# define BIGENDIAN_P() 1 45#else 46# define BIGENDIAN_P() 0 47#endif 48 49#ifdef NATINT_PACK 50# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len)) 51#else 52# define NATINT_LEN(type,len) ((int)sizeof(type)) 53#endif 54 55#if SIZEOF_LONG == 8 56# define INT64toNUM(x) LONG2NUM(x) 57# define UINT64toNUM(x) ULONG2NUM(x) 58#elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8 59# define INT64toNUM(x) LL2NUM(x) 60# define UINT64toNUM(x) ULL2NUM(x) 61#endif 62 63#define define_swapx(x, xtype) \ 64static xtype \ 65TOKEN_PASTE(swap,x)(xtype z) \ 66{ \ 67 xtype r; \ 68 xtype *zp; \ 69 unsigned char *s, *t; \ 70 int i; \ 71 \ 72 zp = xmalloc(sizeof(xtype)); \ 73 *zp = z; \ 74 s = (unsigned char*)zp; \ 75 t = xmalloc(sizeof(xtype)); \ 76 for (i=0; i<sizeof(xtype); i++) { \ 77 t[sizeof(xtype)-i-1] = s[i]; \ 78 } \ 79 r = *(xtype *)t; \ 80 xfree(t); \ 81 xfree(zp); \ 82 return r; \ 83} 84 85#if GCC_VERSION_SINCE(4,3,0) 86# define swap32(x) __builtin_bswap32(x) 87# define swap64(x) __builtin_bswap64(x) 88#endif 89 90#ifndef swap16 91# define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF))) 92#endif 93 94#ifndef swap32 95# define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \ 96 |(((x)>>24)&0xFF) \ 97 |(((x)&0x0000FF00)<<8) \ 98 |(((x)&0x00FF0000)>>8) )) 99#endif 100 101#ifndef swap64 102# ifdef HAVE_INT64_T 103# define byte_in_64bit(n) ((uint64_t)0xff << (n)) 104# define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \ 105 |(((x)>>56)&0xFF) \ 106 |(((x)&byte_in_64bit(8))<<40) \ 107 |(((x)&byte_in_64bit(48))>>40) \ 108 |(((x)&byte_in_64bit(16))<<24) \ 109 |(((x)&byte_in_64bit(40))>>24) \ 110 |(((x)&byte_in_64bit(24))<<8) \ 111 |(((x)&byte_in_64bit(32))>>8))) 112# endif 113#endif 114 115#if SIZEOF_SHORT == 2 116# define swaps(x) swap16(x) 117#elif SIZEOF_SHORT == 4 118# define swaps(x) swap32(x) 119#else 120 define_swapx(s,short) 121#endif 122 123#if SIZEOF_INT == 2 124# define swapi(x) swap16(x) 125#elif SIZEOF_INT == 4 126# define swapi(x) swap32(x) 127#else 128 define_swapx(i,int) 129#endif 130 131#if SIZEOF_LONG == 4 132# define swapl(x) swap32(x) 133#elif SIZEOF_LONG == 8 134# define swapl(x) swap64(x) 135#else 136 define_swapx(l,long) 137#endif 138 139#ifdef HAVE_LONG_LONG 140# if SIZEOF_LONG_LONG == 8 141# define swapll(x) swap64(x) 142# else 143 define_swapx(ll,LONG_LONG) 144# endif 145#endif 146 147#if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T) 148# define swapf(x) swap32(x) 149# define FLOAT_SWAPPER uint32_t 150#else 151 define_swapx(f,float) 152#endif 153 154#if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T) 155# define swapd(x) swap64(x) 156# define DOUBLE_SWAPPER uint64_t 157#elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T) 158 static double 159 swapd(const double d) 160 { 161 double dtmp = d; 162 uint32_t utmp[2]; 163 uint32_t utmp0; 164 165 utmp[0] = 0; utmp[1] = 0; 166 memcpy(utmp,&dtmp,sizeof(double)); 167 utmp0 = utmp[0]; 168 utmp[0] = swap32(utmp[1]); 169 utmp[1] = swap32(utmp0); 170 memcpy(&dtmp,utmp,sizeof(double)); 171 return dtmp; 172 } 173#else 174 define_swapx(d, double) 175#endif 176 177#undef define_swapx 178 179#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x)) 180#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x)) 181#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x)) 182#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x)) 183#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x)) 184#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x)) 185#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x)) 186#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x)) 187 188#ifdef FLOAT_SWAPPER 189# define FLOAT_CONVWITH(y) FLOAT_SWAPPER y; 190# define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 191 (y) = rb_htonf((FLOAT_SWAPPER)(y)), \ 192 memcpy(&(x),&(y),sizeof(float)), \ 193 (x)) 194# define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 195 (y) = rb_htovf((FLOAT_SWAPPER)(y)), \ 196 memcpy(&(x),&(y),sizeof(float)), \ 197 (x)) 198# define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 199 (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \ 200 memcpy(&(x),&(y),sizeof(float)), \ 201 (x)) 202# define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \ 203 (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \ 204 memcpy(&(x),&(y),sizeof(float)), \ 205 (x)) 206#else 207# define FLOAT_CONVWITH(y) 208# define HTONF(x,y) rb_htonf(x) 209# define HTOVF(x,y) rb_htovf(x) 210# define NTOHF(x,y) rb_ntohf(x) 211# define VTOHF(x,y) rb_vtohf(x) 212#endif 213 214#ifdef DOUBLE_SWAPPER 215# define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y; 216# define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 217 (y) = rb_htond((DOUBLE_SWAPPER)(y)), \ 218 memcpy(&(x),&(y),sizeof(double)), \ 219 (x)) 220# define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 221 (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \ 222 memcpy(&(x),&(y),sizeof(double)), \ 223 (x)) 224# define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 225 (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \ 226 memcpy(&(x),&(y),sizeof(double)), \ 227 (x)) 228# define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \ 229 (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \ 230 memcpy(&(x),&(y),sizeof(double)), \ 231 (x)) 232#else 233# define DOUBLE_CONVWITH(y) 234# define HTOND(x,y) rb_htond(x) 235# define HTOVD(x,y) rb_htovd(x) 236# define NTOHD(x,y) rb_ntohd(x) 237# define VTOHD(x,y) rb_vtohd(x) 238#endif 239 240static unsigned long 241num2i32(VALUE x) 242{ 243 x = rb_to_int(x); /* is nil OK? (should not) */ 244 245 if (FIXNUM_P(x)) return FIX2LONG(x); 246 if (RB_TYPE_P(x, T_BIGNUM)) { 247 return rb_big2ulong_pack(x); 248 } 249 rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x)); 250 251 UNREACHABLE; 252} 253 254#define MAX_INTEGER_PACK_SIZE 8 255/* #define FORCE_BIG_PACK */ 256 257static const char toofew[] = "too few arguments"; 258 259static void encodes(VALUE,const char*,long,int,int); 260static void qpencode(VALUE,VALUE,long); 261 262static unsigned long utf8_to_uv(const char*,long*); 263 264/* 265 * call-seq: 266 * arr.pack ( aTemplateString ) -> aBinaryString 267 * 268 * Packs the contents of <i>arr</i> into a binary sequence according to 269 * the directives in <i>aTemplateString</i> (see the table below) 270 * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, 271 * which gives the width of the resulting field. The remaining 272 * directives also may take a count, indicating the number of array 273 * elements to convert. If the count is an asterisk 274 * (``<code>*</code>''), all remaining array elements will be 275 * converted. Any of the directives ``<code>sSiIlL</code>'' may be 276 * followed by an underscore (``<code>_</code>'') or 277 * exclamation mark (``<code>!</code>'') to use the underlying 278 * platform's native size for the specified type; otherwise, they use a 279 * platform-independent size. Spaces are ignored in the template 280 * string. See also <code>String#unpack</code>. 281 * 282 * a = [ "a", "b", "c" ] 283 * n = [ 65, 66, 67 ] 284 * a.pack("A3A3A3") #=> "a b c " 285 * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000" 286 * n.pack("ccc") #=> "ABC" 287 * 288 * Directives for +pack+. 289 * 290 * Integer | Array | 291 * Directive | Element | Meaning 292 * --------------------------------------------------------------------------- 293 * C | Integer | 8-bit unsigned (unsigned char) 294 * S | Integer | 16-bit unsigned, native endian (uint16_t) 295 * L | Integer | 32-bit unsigned, native endian (uint32_t) 296 * Q | Integer | 64-bit unsigned, native endian (uint64_t) 297 * | | 298 * c | Integer | 8-bit signed (signed char) 299 * s | Integer | 16-bit signed, native endian (int16_t) 300 * l | Integer | 32-bit signed, native endian (int32_t) 301 * q | Integer | 64-bit signed, native endian (int64_t) 302 * | | 303 * S_, S! | Integer | unsigned short, native endian 304 * I, I_, I! | Integer | unsigned int, native endian 305 * L_, L! | Integer | unsigned long, native endian 306 * | | 307 * s_, s! | Integer | signed short, native endian 308 * i, i_, i! | Integer | signed int, native endian 309 * l_, l! | Integer | signed long, native endian 310 * | | 311 * S> L> Q> | Integer | same as the directives without ">" except 312 * s> l> q> | | big endian 313 * S!> I!> | | (available since Ruby 1.9.3) 314 * L!> | | "S>" is same as "n" 315 * s!> i!> | | "L>" is same as "N" 316 * l!> | | 317 * | | 318 * S< L< Q< | Integer | same as the directives without "<" except 319 * s< l< q< | | little endian 320 * S!< I!< | | (available since Ruby 1.9.3) 321 * L!< | | "S<" is same as "v" 322 * s!< i!< | | "L<" is same as "V" 323 * l!< | | 324 * | | 325 * n | Integer | 16-bit unsigned, network (big-endian) byte order 326 * N | Integer | 32-bit unsigned, network (big-endian) byte order 327 * v | Integer | 16-bit unsigned, VAX (little-endian) byte order 328 * V | Integer | 32-bit unsigned, VAX (little-endian) byte order 329 * | | 330 * U | Integer | UTF-8 character 331 * w | Integer | BER-compressed integer 332 * 333 * Float | | 334 * Directive | | Meaning 335 * --------------------------------------------------------------------------- 336 * D, d | Float | double-precision, native format 337 * F, f | Float | single-precision, native format 338 * E | Float | double-precision, little-endian byte order 339 * e | Float | single-precision, little-endian byte order 340 * G | Float | double-precision, network (big-endian) byte order 341 * g | Float | single-precision, network (big-endian) byte order 342 * 343 * String | | 344 * Directive | | Meaning 345 * --------------------------------------------------------------------------- 346 * A | String | arbitrary binary string (space padded, count is width) 347 * a | String | arbitrary binary string (null padded, count is width) 348 * Z | String | same as ``a'', except that null is added with * 349 * B | String | bit string (MSB first) 350 * b | String | bit string (LSB first) 351 * H | String | hex string (high nibble first) 352 * h | String | hex string (low nibble first) 353 * u | String | UU-encoded string 354 * M | String | quoted printable, MIME encoding (see RFC2045) 355 * m | String | base64 encoded string (see RFC 2045, count is width) 356 * | | (if count is 0, no line feed are added, see RFC 4648) 357 * P | String | pointer to a structure (fixed-length string) 358 * p | String | pointer to a null-terminated string 359 * 360 * Misc. | | 361 * Directive | | Meaning 362 * --------------------------------------------------------------------------- 363 * @ | --- | moves to absolute position 364 * X | --- | back up a byte 365 * x | --- | null byte 366 */ 367 368static VALUE 369pack_pack(VALUE ary, VALUE fmt) 370{ 371 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0"; 372 static const char spc10[] = " "; 373 const char *p, *pend; 374 VALUE res, from, associates = 0; 375 char type; 376 long items, len, idx, plen; 377 const char *ptr; 378 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ 379#ifdef NATINT_PACK 380 int natint; /* native integer */ 381#endif 382 int integer_size, bigendian_p; 383 384 StringValue(fmt); 385 p = RSTRING_PTR(fmt); 386 pend = p + RSTRING_LEN(fmt); 387 res = rb_str_buf_new(0); 388 389 items = RARRAY_LEN(ary); 390 idx = 0; 391 392#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0) 393#define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW) 394#define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW) 395 396 while (p < pend) { 397 int explicit_endian = 0; 398 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { 399 rb_raise(rb_eRuntimeError, "format string modified"); 400 } 401 type = *p++; /* get data type */ 402#ifdef NATINT_PACK 403 natint = 0; 404#endif 405 406 if (ISSPACE(type)) continue; 407 if (type == '#') { 408 while ((p < pend) && (*p != '\n')) { 409 p++; 410 } 411 continue; 412 } 413 414 { 415 static const char natstr[] = "sSiIlL"; 416 static const char endstr[] = "sSiIlLqQ"; 417 418 modifiers: 419 switch (*p) { 420 case '_': 421 case '!': 422 if (strchr(natstr, type)) { 423#ifdef NATINT_PACK 424 natint = 1; 425#endif 426 p++; 427 } 428 else { 429 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); 430 } 431 goto modifiers; 432 433 case '<': 434 case '>': 435 if (!strchr(endstr, type)) { 436 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); 437 } 438 if (explicit_endian) { 439 rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); 440 } 441 explicit_endian = *p++; 442 goto modifiers; 443 } 444 } 445 446 if (*p == '*') { /* set data length */ 447 len = strchr("@Xxu", type) ? 0 448 : strchr("PMm", type) ? 1 449 : items; 450 p++; 451 } 452 else if (ISDIGIT(*p)) { 453 errno = 0; 454 len = STRTOUL(p, (char**)&p, 10); 455 if (errno) { 456 rb_raise(rb_eRangeError, "pack length too big"); 457 } 458 } 459 else { 460 len = 1; 461 } 462 463 switch (type) { 464 case 'U': 465 /* if encoding is US-ASCII, upgrade to UTF-8 */ 466 if (enc_info == 1) enc_info = 2; 467 break; 468 case 'm': case 'M': case 'u': 469 /* keep US-ASCII (do nothing) */ 470 break; 471 default: 472 /* fall back to BINARY */ 473 enc_info = 0; 474 break; 475 } 476 switch (type) { 477 case 'A': case 'a': case 'Z': 478 case 'B': case 'b': 479 case 'H': case 'h': 480 from = NEXTFROM; 481 if (NIL_P(from)) { 482 ptr = ""; 483 plen = 0; 484 } 485 else { 486 StringValue(from); 487 ptr = RSTRING_PTR(from); 488 plen = RSTRING_LEN(from); 489 OBJ_INFECT(res, from); 490 } 491 492 if (p[-1] == '*') 493 len = plen; 494 495 switch (type) { 496 case 'a': /* arbitrary binary string (null padded) */ 497 case 'A': /* arbitrary binary string (ASCII space padded) */ 498 case 'Z': /* null terminated string */ 499 if (plen >= len) { 500 rb_str_buf_cat(res, ptr, len); 501 if (p[-1] == '*' && type == 'Z') 502 rb_str_buf_cat(res, nul10, 1); 503 } 504 else { 505 rb_str_buf_cat(res, ptr, plen); 506 len -= plen; 507 while (len >= 10) { 508 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); 509 len -= 10; 510 } 511 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); 512 } 513 break; 514 515#define castchar(from) (char)((from) & 0xff) 516 517 case 'b': /* bit string (ascending) */ 518 { 519 int byte = 0; 520 long i, j = 0; 521 522 if (len > plen) { 523 j = (len - plen + 1)/2; 524 len = plen; 525 } 526 for (i=0; i++ < len; ptr++) { 527 if (*ptr & 1) 528 byte |= 128; 529 if (i & 7) 530 byte >>= 1; 531 else { 532 char c = castchar(byte); 533 rb_str_buf_cat(res, &c, 1); 534 byte = 0; 535 } 536 } 537 if (len & 7) { 538 char c; 539 byte >>= 7 - (len & 7); 540 c = castchar(byte); 541 rb_str_buf_cat(res, &c, 1); 542 } 543 len = j; 544 goto grow; 545 } 546 break; 547 548 case 'B': /* bit string (descending) */ 549 { 550 int byte = 0; 551 long i, j = 0; 552 553 if (len > plen) { 554 j = (len - plen + 1)/2; 555 len = plen; 556 } 557 for (i=0; i++ < len; ptr++) { 558 byte |= *ptr & 1; 559 if (i & 7) 560 byte <<= 1; 561 else { 562 char c = castchar(byte); 563 rb_str_buf_cat(res, &c, 1); 564 byte = 0; 565 } 566 } 567 if (len & 7) { 568 char c; 569 byte <<= 7 - (len & 7); 570 c = castchar(byte); 571 rb_str_buf_cat(res, &c, 1); 572 } 573 len = j; 574 goto grow; 575 } 576 break; 577 578 case 'h': /* hex string (low nibble first) */ 579 { 580 int byte = 0; 581 long i, j = 0; 582 583 if (len > plen) { 584 j = (len + 1) / 2 - (plen + 1) / 2; 585 len = plen; 586 } 587 for (i=0; i++ < len; ptr++) { 588 if (ISALPHA(*ptr)) 589 byte |= (((*ptr & 15) + 9) & 15) << 4; 590 else 591 byte |= (*ptr & 15) << 4; 592 if (i & 1) 593 byte >>= 4; 594 else { 595 char c = castchar(byte); 596 rb_str_buf_cat(res, &c, 1); 597 byte = 0; 598 } 599 } 600 if (len & 1) { 601 char c = castchar(byte); 602 rb_str_buf_cat(res, &c, 1); 603 } 604 len = j; 605 goto grow; 606 } 607 break; 608 609 case 'H': /* hex string (high nibble first) */ 610 { 611 int byte = 0; 612 long i, j = 0; 613 614 if (len > plen) { 615 j = (len + 1) / 2 - (plen + 1) / 2; 616 len = plen; 617 } 618 for (i=0; i++ < len; ptr++) { 619 if (ISALPHA(*ptr)) 620 byte |= ((*ptr & 15) + 9) & 15; 621 else 622 byte |= *ptr & 15; 623 if (i & 1) 624 byte <<= 4; 625 else { 626 char c = castchar(byte); 627 rb_str_buf_cat(res, &c, 1); 628 byte = 0; 629 } 630 } 631 if (len & 1) { 632 char c = castchar(byte); 633 rb_str_buf_cat(res, &c, 1); 634 } 635 len = j; 636 goto grow; 637 } 638 break; 639 } 640 break; 641 642 case 'c': /* signed char */ 643 case 'C': /* unsigned char */ 644 while (len-- > 0) { 645 char c; 646 647 from = NEXTFROM; 648 c = (char)num2i32(from); 649 rb_str_buf_cat(res, &c, sizeof(char)); 650 } 651 break; 652 653 case 's': /* signed short */ 654 integer_size = NATINT_LEN(short, 2); 655 bigendian_p = BIGENDIAN_P(); 656 goto pack_integer; 657 658 case 'S': /* unsigned short */ 659 integer_size = NATINT_LEN(short, 2); 660 bigendian_p = BIGENDIAN_P(); 661 goto pack_integer; 662 663 case 'i': /* signed int */ 664 integer_size = (int)sizeof(int); 665 bigendian_p = BIGENDIAN_P(); 666 goto pack_integer; 667 668 case 'I': /* unsigned int */ 669 integer_size = (int)sizeof(int); 670 bigendian_p = BIGENDIAN_P(); 671 goto pack_integer; 672 673 case 'l': /* signed long */ 674 integer_size = NATINT_LEN(long, 4); 675 bigendian_p = BIGENDIAN_P(); 676 goto pack_integer; 677 678 case 'L': /* unsigned long */ 679 integer_size = NATINT_LEN(long, 4); 680 bigendian_p = BIGENDIAN_P(); 681 goto pack_integer; 682 683 case 'q': /* signed quad (64bit) int */ 684 integer_size = 8; 685 bigendian_p = BIGENDIAN_P(); 686 goto pack_integer; 687 688 case 'Q': /* unsigned quad (64bit) int */ 689 integer_size = 8; 690 bigendian_p = BIGENDIAN_P(); 691 goto pack_integer; 692 693 case 'n': /* unsigned short (network byte-order) */ 694 integer_size = 2; 695 bigendian_p = 1; 696 goto pack_integer; 697 698 case 'N': /* unsigned long (network byte-order) */ 699 integer_size = 4; 700 bigendian_p = 1; 701 goto pack_integer; 702 703 case 'v': /* unsigned short (VAX byte-order) */ 704 integer_size = 2; 705 bigendian_p = 0; 706 goto pack_integer; 707 708 case 'V': /* unsigned long (VAX byte-order) */ 709 integer_size = 4; 710 bigendian_p = 0; 711 goto pack_integer; 712 713 pack_integer: 714 if (explicit_endian) { 715 bigendian_p = explicit_endian == '>'; 716 } 717 718 switch (integer_size) { 719#if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK) 720 case SIZEOF_INT16_T: 721 while (len-- > 0) { 722 union { 723 int16_t i; 724 char a[sizeof(int16_t)]; 725 } v; 726 727 from = NEXTFROM; 728 v.i = (int16_t)num2i32(from); 729 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 730 rb_str_buf_cat(res, v.a, sizeof(int16_t)); 731 } 732 break; 733#endif 734 735#if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK) 736 case SIZEOF_INT32_T: 737 while (len-- > 0) { 738 union { 739 int32_t i; 740 char a[sizeof(int32_t)]; 741 } v; 742 743 from = NEXTFROM; 744 v.i = (int32_t)num2i32(from); 745 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 746 rb_str_buf_cat(res, v.a, sizeof(int32_t)); 747 } 748 break; 749#endif 750 751#if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK) 752 case SIZEOF_INT64_T: 753 while (len-- > 0) { 754 union { 755 int64_t i; 756 char a[sizeof(int64_t)]; 757 } v; 758 759 from = NEXTFROM; 760 v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */ 761 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 762 rb_str_buf_cat(res, v.a, sizeof(int64_t)); 763 } 764 break; 765#endif 766 767 default: 768 if (integer_size > MAX_INTEGER_PACK_SIZE) 769 rb_bug("unexpected intger size for pack: %d", integer_size); 770 while (len-- > 0) { 771 union { 772 unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG]; 773 char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG]; 774 } v; 775 int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG; 776 int i; 777 778 from = NEXTFROM; 779 rb_big_pack(from, v.i, num_longs); 780 if (bigendian_p) { 781 for (i = 0; i < num_longs/2; i++) { 782 unsigned long t = v.i[i]; 783 v.i[i] = v.i[num_longs-1-i]; 784 v.i[num_longs-1-i] = t; 785 } 786 } 787 if (bigendian_p != BIGENDIAN_P()) { 788 for (i = 0; i < num_longs; i++) 789 v.i[i] = swapl(v.i[i]); 790 } 791 rb_str_buf_cat(res, 792 bigendian_p ? 793 v.a + sizeof(long)*num_longs - integer_size : 794 v.a, 795 integer_size); 796 } 797 break; 798 } 799 break; 800 801 case 'f': /* single precision float in native format */ 802 case 'F': /* ditto */ 803 while (len-- > 0) { 804 float f; 805 806 from = NEXTFROM; 807 f = (float)RFLOAT_VALUE(rb_to_float(from)); 808 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 809 } 810 break; 811 812 case 'e': /* single precision float in VAX byte-order */ 813 while (len-- > 0) { 814 float f; 815 FLOAT_CONVWITH(ftmp); 816 817 from = NEXTFROM; 818 f = (float)RFLOAT_VALUE(rb_to_float(from)); 819 f = HTOVF(f,ftmp); 820 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 821 } 822 break; 823 824 case 'E': /* double precision float in VAX byte-order */ 825 while (len-- > 0) { 826 double d; 827 DOUBLE_CONVWITH(dtmp); 828 829 from = NEXTFROM; 830 d = RFLOAT_VALUE(rb_to_float(from)); 831 d = HTOVD(d,dtmp); 832 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 833 } 834 break; 835 836 case 'd': /* double precision float in native format */ 837 case 'D': /* ditto */ 838 while (len-- > 0) { 839 double d; 840 841 from = NEXTFROM; 842 d = RFLOAT_VALUE(rb_to_float(from)); 843 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 844 } 845 break; 846 847 case 'g': /* single precision float in network byte-order */ 848 while (len-- > 0) { 849 float f; 850 FLOAT_CONVWITH(ftmp); 851 852 from = NEXTFROM; 853 f = (float)RFLOAT_VALUE(rb_to_float(from)); 854 f = HTONF(f,ftmp); 855 rb_str_buf_cat(res, (char*)&f, sizeof(float)); 856 } 857 break; 858 859 case 'G': /* double precision float in network byte-order */ 860 while (len-- > 0) { 861 double d; 862 DOUBLE_CONVWITH(dtmp); 863 864 from = NEXTFROM; 865 d = RFLOAT_VALUE(rb_to_float(from)); 866 d = HTOND(d,dtmp); 867 rb_str_buf_cat(res, (char*)&d, sizeof(double)); 868 } 869 break; 870 871 case 'x': /* null byte */ 872 grow: 873 while (len >= 10) { 874 rb_str_buf_cat(res, nul10, 10); 875 len -= 10; 876 } 877 rb_str_buf_cat(res, nul10, len); 878 break; 879 880 case 'X': /* back up byte */ 881 shrink: 882 plen = RSTRING_LEN(res); 883 if (plen < len) 884 rb_raise(rb_eArgError, "X outside of string"); 885 rb_str_set_len(res, plen - len); 886 break; 887 888 case '@': /* null fill to absolute position */ 889 len -= RSTRING_LEN(res); 890 if (len > 0) goto grow; 891 len = -len; 892 if (len > 0) goto shrink; 893 break; 894 895 case '%': 896 rb_raise(rb_eArgError, "%% is not supported"); 897 break; 898 899 case 'U': /* Unicode character */ 900 while (len-- > 0) { 901 SIGNED_VALUE l; 902 char buf[8]; 903 int le; 904 905 from = NEXTFROM; 906 from = rb_to_int(from); 907 l = NUM2LONG(from); 908 if (l < 0) { 909 rb_raise(rb_eRangeError, "pack(U): value out of range"); 910 } 911 le = rb_uv_to_utf8(buf, l); 912 rb_str_buf_cat(res, (char*)buf, le); 913 } 914 break; 915 916 case 'u': /* uuencoded string */ 917 case 'm': /* base64 encoded string */ 918 from = NEXTFROM; 919 StringValue(from); 920 ptr = RSTRING_PTR(from); 921 plen = RSTRING_LEN(from); 922 923 if (len == 0 && type == 'm') { 924 encodes(res, ptr, plen, type, 0); 925 ptr += plen; 926 break; 927 } 928 if (len <= 2) 929 len = 45; 930 else if (len > 63 && type == 'u') 931 len = 63; 932 else 933 len = len / 3 * 3; 934 while (plen > 0) { 935 long todo; 936 937 if (plen > len) 938 todo = len; 939 else 940 todo = plen; 941 encodes(res, ptr, todo, type, 1); 942 plen -= todo; 943 ptr += todo; 944 } 945 break; 946 947 case 'M': /* quoted-printable encoded string */ 948 from = rb_obj_as_string(NEXTFROM); 949 if (len <= 1) 950 len = 72; 951 qpencode(res, from, len); 952 break; 953 954 case 'P': /* pointer to packed byte string */ 955 from = THISFROM; 956 if (!NIL_P(from)) { 957 StringValue(from); 958 if (RSTRING_LEN(from) < len) { 959 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", 960 RSTRING_LEN(from), len); 961 } 962 } 963 len = 1; 964 /* FALL THROUGH */ 965 case 'p': /* pointer to string */ 966 while (len-- > 0) { 967 char *t; 968 from = NEXTFROM; 969 if (NIL_P(from)) { 970 t = 0; 971 } 972 else { 973 t = StringValuePtr(from); 974 } 975 if (!associates) { 976 associates = rb_ary_new(); 977 } 978 rb_ary_push(associates, from); 979 rb_obj_taint(from); 980 rb_str_buf_cat(res, (char*)&t, sizeof(char*)); 981 } 982 break; 983 984 case 'w': /* BER compressed integer */ 985 while (len-- > 0) { 986 unsigned long ul; 987 VALUE buf = rb_str_new(0, 0); 988 char c, *bufs, *bufe; 989 990 from = NEXTFROM; 991 if (RB_TYPE_P(from, T_BIGNUM)) { 992 VALUE big128 = rb_uint2big(128); 993 while (RB_TYPE_P(from, T_BIGNUM)) { 994 from = rb_big_divmod(from, big128); 995 c = castchar(NUM2INT(RARRAY_PTR(from)[1]) | 0x80); /* mod */ 996 rb_str_buf_cat(buf, &c, sizeof(char)); 997 from = RARRAY_PTR(from)[0]; /* div */ 998 } 999 } 1000 1001 { 1002 long l = NUM2LONG(from); 1003 if (l < 0) { 1004 rb_raise(rb_eArgError, "can't compress negative numbers"); 1005 } 1006 ul = l; 1007 } 1008 1009 while (ul) { 1010 c = castchar((ul & 0x7f) | 0x80); 1011 rb_str_buf_cat(buf, &c, sizeof(char)); 1012 ul >>= 7; 1013 } 1014 1015 if (RSTRING_LEN(buf)) { 1016 bufs = RSTRING_PTR(buf); 1017 bufe = bufs + RSTRING_LEN(buf) - 1; 1018 *bufs &= 0x7f; /* clear continue bit */ 1019 while (bufs < bufe) { /* reverse */ 1020 c = *bufs; 1021 *bufs++ = *bufe; 1022 *bufe-- = c; 1023 } 1024 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); 1025 } 1026 else { 1027 c = 0; 1028 rb_str_buf_cat(res, &c, sizeof(char)); 1029 } 1030 } 1031 break; 1032 1033 default: 1034 rb_warning("unknown pack directive '%c' in '%s'", 1035 type, RSTRING_PTR(fmt)); 1036 break; 1037 } 1038 } 1039 1040 if (associates) { 1041 rb_str_associate(res, associates); 1042 } 1043 OBJ_INFECT(res, fmt); 1044 switch (enc_info) { 1045 case 1: 1046 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); 1047 break; 1048 case 2: 1049 rb_enc_set_index(res, rb_utf8_encindex()); 1050 break; 1051 default: 1052 /* do nothing, keep ASCII-8BIT */ 1053 break; 1054 } 1055 return res; 1056} 1057 1058static const char uu_table[] = 1059"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; 1060static const char b64_table[] = 1061"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 1062 1063static void 1064encodes(VALUE str, const char *s, long len, int type, int tail_lf) 1065{ 1066 char buff[4096]; 1067 long i = 0; 1068 const char *trans = type == 'u' ? uu_table : b64_table; 1069 char padding; 1070 1071 if (type == 'u') { 1072 buff[i++] = (char)len + ' '; 1073 padding = '`'; 1074 } 1075 else { 1076 padding = '='; 1077 } 1078 while (len >= 3) { 1079 while (len >= 3 && sizeof(buff)-i >= 4) { 1080 buff[i++] = trans[077 & (*s >> 2)]; 1081 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; 1082 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))]; 1083 buff[i++] = trans[077 & s[2]]; 1084 s += 3; 1085 len -= 3; 1086 } 1087 if (sizeof(buff)-i < 4) { 1088 rb_str_buf_cat(str, buff, i); 1089 i = 0; 1090 } 1091 } 1092 1093 if (len == 2) { 1094 buff[i++] = trans[077 & (*s >> 2)]; 1095 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))]; 1096 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))]; 1097 buff[i++] = padding; 1098 } 1099 else if (len == 1) { 1100 buff[i++] = trans[077 & (*s >> 2)]; 1101 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))]; 1102 buff[i++] = padding; 1103 buff[i++] = padding; 1104 } 1105 if (tail_lf) buff[i++] = '\n'; 1106 rb_str_buf_cat(str, buff, i); 1107} 1108 1109static const char hex_table[] = "0123456789ABCDEF"; 1110 1111static void 1112qpencode(VALUE str, VALUE from, long len) 1113{ 1114 char buff[1024]; 1115 long i = 0, n = 0, prev = EOF; 1116 unsigned char *s = (unsigned char*)RSTRING_PTR(from); 1117 unsigned char *send = s + RSTRING_LEN(from); 1118 1119 while (s < send) { 1120 if ((*s > 126) || 1121 (*s < 32 && *s != '\n' && *s != '\t') || 1122 (*s == '=')) { 1123 buff[i++] = '='; 1124 buff[i++] = hex_table[*s >> 4]; 1125 buff[i++] = hex_table[*s & 0x0f]; 1126 n += 3; 1127 prev = EOF; 1128 } 1129 else if (*s == '\n') { 1130 if (prev == ' ' || prev == '\t') { 1131 buff[i++] = '='; 1132 buff[i++] = *s; 1133 } 1134 buff[i++] = *s; 1135 n = 0; 1136 prev = *s; 1137 } 1138 else { 1139 buff[i++] = *s; 1140 n++; 1141 prev = *s; 1142 } 1143 if (n > len) { 1144 buff[i++] = '='; 1145 buff[i++] = '\n'; 1146 n = 0; 1147 prev = '\n'; 1148 } 1149 if (i > 1024 - 5) { 1150 rb_str_buf_cat(str, buff, i); 1151 i = 0; 1152 } 1153 s++; 1154 } 1155 if (n > 0) { 1156 buff[i++] = '='; 1157 buff[i++] = '\n'; 1158 } 1159 if (i > 0) { 1160 rb_str_buf_cat(str, buff, i); 1161 } 1162} 1163 1164static inline int 1165hex2num(char c) 1166{ 1167 switch (c) { 1168 case '0': case '1': case '2': case '3': case '4': 1169 case '5': case '6': case '7': case '8': case '9': 1170 return c - '0'; 1171 case 'a': case 'b': case 'c': 1172 case 'd': case 'e': case 'f': 1173 return c - 'a' + 10; 1174 case 'A': case 'B': case 'C': 1175 case 'D': case 'E': case 'F': 1176 return c - 'A' + 10; 1177 default: 1178 return -1; 1179 } 1180} 1181 1182#define PACK_LENGTH_ADJUST_SIZE(sz) do { \ 1183 tmp_len = 0; \ 1184 if (len > (long)((send-s)/(sz))) { \ 1185 if (!star) { \ 1186 tmp_len = len-(send-s)/(sz); \ 1187 } \ 1188 len = (send-s)/(sz); \ 1189 } \ 1190} while (0) 1191 1192#define PACK_ITEM_ADJUST() do { \ 1193 if (tmp_len > 0 && !block_p) \ 1194 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \ 1195} while (0) 1196 1197static VALUE 1198infected_str_new(const char *ptr, long len, VALUE str) 1199{ 1200 VALUE s = rb_str_new(ptr, len); 1201 1202 OBJ_INFECT(s, str); 1203 return s; 1204} 1205 1206/* 1207 * call-seq: 1208 * str.unpack(format) -> anArray 1209 * 1210 * Decodes <i>str</i> (which may contain binary data) according to the 1211 * format string, returning an array of each value extracted. The 1212 * format string consists of a sequence of single-character directives, 1213 * summarized in the table at the end of this entry. 1214 * Each directive may be followed 1215 * by a number, indicating the number of times to repeat with this 1216 * directive. An asterisk (``<code>*</code>'') will use up all 1217 * remaining elements. The directives <code>sSiIlL</code> may each be 1218 * followed by an underscore (``<code>_</code>'') or 1219 * exclamation mark (``<code>!</code>'') to use the underlying 1220 * platform's native size for the specified type; otherwise, it uses a 1221 * platform-independent consistent size. Spaces are ignored in the 1222 * format string. See also <code>Array#pack</code>. 1223 * 1224 * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] 1225 * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] 1226 * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] 1227 * "aa".unpack('b8B8') #=> ["10000110", "01100001"] 1228 * "aaa".unpack('h2H2c') #=> ["16", "61", 97] 1229 * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] 1230 * "now=20is".unpack('M*') #=> ["now is"] 1231 * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] 1232 * 1233 * This table summarizes the various formats and the Ruby classes 1234 * returned by each. 1235 * 1236 * Integer | | 1237 * Directive | Returns | Meaning 1238 * ----------------------------------------------------------------- 1239 * C | Integer | 8-bit unsigned (unsigned char) 1240 * S | Integer | 16-bit unsigned, native endian (uint16_t) 1241 * L | Integer | 32-bit unsigned, native endian (uint32_t) 1242 * Q | Integer | 64-bit unsigned, native endian (uint64_t) 1243 * | | 1244 * c | Integer | 8-bit signed (signed char) 1245 * s | Integer | 16-bit signed, native endian (int16_t) 1246 * l | Integer | 32-bit signed, native endian (int32_t) 1247 * q | Integer | 64-bit signed, native endian (int64_t) 1248 * | | 1249 * S_, S! | Integer | unsigned short, native endian 1250 * I, I_, I! | Integer | unsigned int, native endian 1251 * L_, L! | Integer | unsigned long, native endian 1252 * | | 1253 * s_, s! | Integer | signed short, native endian 1254 * i, i_, i! | Integer | signed int, native endian 1255 * l_, l! | Integer | signed long, native endian 1256 * | | 1257 * S> L> Q> | Integer | same as the directives without ">" except 1258 * s> l> q> | | big endian 1259 * S!> I!> | | (available since Ruby 1.9.3) 1260 * L!> Q!> | | "S>" is same as "n" 1261 * s!> i!> | | "L>" is same as "N" 1262 * l!> q!> | | 1263 * | | 1264 * S< L< Q< | Integer | same as the directives without "<" except 1265 * s< l< q< | | little endian 1266 * S!< I!< | | (available since Ruby 1.9.3) 1267 * L!< Q!< | | "S<" is same as "v" 1268 * s!< i!< | | "L<" is same as "V" 1269 * l!< q!< | | 1270 * | | 1271 * n | Integer | 16-bit unsigned, network (big-endian) byte order 1272 * N | Integer | 32-bit unsigned, network (big-endian) byte order 1273 * v | Integer | 16-bit unsigned, VAX (little-endian) byte order 1274 * V | Integer | 32-bit unsigned, VAX (little-endian) byte order 1275 * | | 1276 * U | Integer | UTF-8 character 1277 * w | Integer | BER-compressed integer (see Array.pack) 1278 * 1279 * Float | | 1280 * Directive | Returns | Meaning 1281 * ----------------------------------------------------------------- 1282 * D, d | Float | double-precision, native format 1283 * F, f | Float | single-precision, native format 1284 * E | Float | double-precision, little-endian byte order 1285 * e | Float | single-precision, little-endian byte order 1286 * G | Float | double-precision, network (big-endian) byte order 1287 * g | Float | single-precision, network (big-endian) byte order 1288 * 1289 * String | | 1290 * Directive | Returns | Meaning 1291 * ----------------------------------------------------------------- 1292 * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces) 1293 * a | String | arbitrary binary string 1294 * Z | String | null-terminated string 1295 * B | String | bit string (MSB first) 1296 * b | String | bit string (LSB first) 1297 * H | String | hex string (high nibble first) 1298 * h | String | hex string (low nibble first) 1299 * u | String | UU-encoded string 1300 * M | String | quoted-printable, MIME encoding (see RFC2045) 1301 * m | String | base64 encoded string (RFC 2045) (default) 1302 * | | base64 encoded string (RFC 4648) if followed by 0 1303 * P | String | pointer to a structure (fixed-length string) 1304 * p | String | pointer to a null-terminated string 1305 * 1306 * Misc. | | 1307 * Directive | Returns | Meaning 1308 * ----------------------------------------------------------------- 1309 * @ | --- | skip to the offset given by the length argument 1310 * X | --- | skip backward one byte 1311 * x | --- | skip forward one byte 1312 */ 1313 1314static VALUE 1315pack_unpack(VALUE str, VALUE fmt) 1316{ 1317 static const char hexdigits[] = "0123456789abcdef"; 1318 char *s, *send; 1319 char *p, *pend; 1320 VALUE ary; 1321 char type; 1322 long len, tmp_len; 1323 int star; 1324#ifdef NATINT_PACK 1325 int natint; /* native integer */ 1326#endif 1327 int block_p = rb_block_given_p(); 1328 int signed_p, integer_size, bigendian_p; 1329#define UNPACK_PUSH(item) do {\ 1330 VALUE item_val = (item);\ 1331 if (block_p) {\ 1332 rb_yield(item_val);\ 1333 }\ 1334 else {\ 1335 rb_ary_push(ary, item_val);\ 1336 }\ 1337 } while (0) 1338 1339 StringValue(str); 1340 StringValue(fmt); 1341 s = RSTRING_PTR(str); 1342 send = s + RSTRING_LEN(str); 1343 p = RSTRING_PTR(fmt); 1344 pend = p + RSTRING_LEN(fmt); 1345 1346 ary = block_p ? Qnil : rb_ary_new(); 1347 while (p < pend) { 1348 int explicit_endian = 0; 1349 type = *p++; 1350#ifdef NATINT_PACK 1351 natint = 0; 1352#endif 1353 1354 if (ISSPACE(type)) continue; 1355 if (type == '#') { 1356 while ((p < pend) && (*p != '\n')) { 1357 p++; 1358 } 1359 continue; 1360 } 1361 1362 star = 0; 1363 { 1364 static const char natstr[] = "sSiIlL"; 1365 static const char endstr[] = "sSiIlLqQ"; 1366 1367 modifiers: 1368 switch (*p) { 1369 case '_': 1370 case '!': 1371 1372 if (strchr(natstr, type)) { 1373#ifdef NATINT_PACK 1374 natint = 1; 1375#endif 1376 p++; 1377 } 1378 else { 1379 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); 1380 } 1381 goto modifiers; 1382 1383 case '<': 1384 case '>': 1385 if (!strchr(endstr, type)) { 1386 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); 1387 } 1388 if (explicit_endian) { 1389 rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); 1390 } 1391 explicit_endian = *p++; 1392 goto modifiers; 1393 } 1394 } 1395 1396 if (p >= pend) 1397 len = 1; 1398 else if (*p == '*') { 1399 star = 1; 1400 len = send - s; 1401 p++; 1402 } 1403 else if (ISDIGIT(*p)) { 1404 errno = 0; 1405 len = STRTOUL(p, (char**)&p, 10); 1406 if (errno) { 1407 rb_raise(rb_eRangeError, "pack length too big"); 1408 } 1409 } 1410 else { 1411 len = (type != '@'); 1412 } 1413 1414 switch (type) { 1415 case '%': 1416 rb_raise(rb_eArgError, "%% is not supported"); 1417 break; 1418 1419 case 'A': 1420 if (len > send - s) len = send - s; 1421 { 1422 long end = len; 1423 char *t = s + len - 1; 1424 1425 while (t >= s) { 1426 if (*t != ' ' && *t != '\0') break; 1427 t--; len--; 1428 } 1429 UNPACK_PUSH(infected_str_new(s, len, str)); 1430 s += end; 1431 } 1432 break; 1433 1434 case 'Z': 1435 { 1436 char *t = s; 1437 1438 if (len > send-s) len = send-s; 1439 while (t < s+len && *t) t++; 1440 UNPACK_PUSH(infected_str_new(s, t-s, str)); 1441 if (t < send) t++; 1442 s = star ? t : s+len; 1443 } 1444 break; 1445 1446 case 'a': 1447 if (len > send - s) len = send - s; 1448 UNPACK_PUSH(infected_str_new(s, len, str)); 1449 s += len; 1450 break; 1451 1452 case 'b': 1453 { 1454 VALUE bitstr; 1455 char *t; 1456 int bits; 1457 long i; 1458 1459 if (p[-1] == '*' || len > (send - s) * 8) 1460 len = (send - s) * 8; 1461 bits = 0; 1462 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len)); 1463 t = RSTRING_PTR(bitstr); 1464 for (i=0; i<len; i++) { 1465 if (i & 7) bits >>= 1; 1466 else bits = *s++; 1467 *t++ = (bits & 1) ? '1' : '0'; 1468 } 1469 } 1470 break; 1471 1472 case 'B': 1473 { 1474 VALUE bitstr; 1475 char *t; 1476 int bits; 1477 long i; 1478 1479 if (p[-1] == '*' || len > (send - s) * 8) 1480 len = (send - s) * 8; 1481 bits = 0; 1482 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len)); 1483 t = RSTRING_PTR(bitstr); 1484 for (i=0; i<len; i++) { 1485 if (i & 7) bits <<= 1; 1486 else bits = *s++; 1487 *t++ = (bits & 128) ? '1' : '0'; 1488 } 1489 } 1490 break; 1491 1492 case 'h': 1493 { 1494 VALUE bitstr; 1495 char *t; 1496 int bits; 1497 long i; 1498 1499 if (p[-1] == '*' || len > (send - s) * 2) 1500 len = (send - s) * 2; 1501 bits = 0; 1502 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len)); 1503 t = RSTRING_PTR(bitstr); 1504 for (i=0; i<len; i++) { 1505 if (i & 1) 1506 bits >>= 4; 1507 else 1508 bits = *s++; 1509 *t++ = hexdigits[bits & 15]; 1510 } 1511 } 1512 break; 1513 1514 case 'H': 1515 { 1516 VALUE bitstr; 1517 char *t; 1518 int bits; 1519 long i; 1520 1521 if (p[-1] == '*' || len > (send - s) * 2) 1522 len = (send - s) * 2; 1523 bits = 0; 1524 UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len)); 1525 t = RSTRING_PTR(bitstr); 1526 for (i=0; i<len; i++) { 1527 if (i & 1) 1528 bits <<= 4; 1529 else 1530 bits = *s++; 1531 *t++ = hexdigits[(bits >> 4) & 15]; 1532 } 1533 } 1534 break; 1535 1536 case 'c': 1537 PACK_LENGTH_ADJUST_SIZE(sizeof(char)); 1538 while (len-- > 0) { 1539 int c = *s++; 1540 if (c > (char)127) c-=256; 1541 UNPACK_PUSH(INT2FIX(c)); 1542 } 1543 PACK_ITEM_ADJUST(); 1544 break; 1545 1546 case 'C': 1547 PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char)); 1548 while (len-- > 0) { 1549 unsigned char c = *s++; 1550 UNPACK_PUSH(INT2FIX(c)); 1551 } 1552 PACK_ITEM_ADJUST(); 1553 break; 1554 1555 case 's': 1556 signed_p = 1; 1557 integer_size = NATINT_LEN(short, 2); 1558 bigendian_p = BIGENDIAN_P(); 1559 goto unpack_integer; 1560 1561 case 'S': 1562 signed_p = 0; 1563 integer_size = NATINT_LEN(short, 2); 1564 bigendian_p = BIGENDIAN_P(); 1565 goto unpack_integer; 1566 1567 case 'i': 1568 signed_p = 1; 1569 integer_size = (int)sizeof(int); 1570 bigendian_p = BIGENDIAN_P(); 1571 goto unpack_integer; 1572 1573 case 'I': 1574 signed_p = 0; 1575 integer_size = (int)sizeof(int); 1576 bigendian_p = BIGENDIAN_P(); 1577 goto unpack_integer; 1578 1579 case 'l': 1580 signed_p = 1; 1581 integer_size = NATINT_LEN(long, 4); 1582 bigendian_p = BIGENDIAN_P(); 1583 goto unpack_integer; 1584 1585 case 'L': 1586 signed_p = 0; 1587 integer_size = NATINT_LEN(long, 4); 1588 bigendian_p = BIGENDIAN_P(); 1589 goto unpack_integer; 1590 1591 case 'q': 1592 signed_p = 1; 1593 integer_size = 8; 1594 bigendian_p = BIGENDIAN_P(); 1595 goto unpack_integer; 1596 1597 case 'Q': 1598 signed_p = 0; 1599 integer_size = 8; 1600 bigendian_p = BIGENDIAN_P(); 1601 goto unpack_integer; 1602 1603 case 'n': 1604 signed_p = 0; 1605 integer_size = 2; 1606 bigendian_p = 1; 1607 goto unpack_integer; 1608 1609 case 'N': 1610 signed_p = 0; 1611 integer_size = 4; 1612 bigendian_p = 1; 1613 goto unpack_integer; 1614 1615 case 'v': 1616 signed_p = 0; 1617 integer_size = 2; 1618 bigendian_p = 0; 1619 goto unpack_integer; 1620 1621 case 'V': 1622 signed_p = 0; 1623 integer_size = 4; 1624 bigendian_p = 0; 1625 goto unpack_integer; 1626 1627 unpack_integer: 1628 if (explicit_endian) { 1629 bigendian_p = explicit_endian == '>'; 1630 } 1631 1632 switch (integer_size) { 1633#if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK) 1634 case SIZEOF_INT16_T: 1635 if (signed_p) { 1636 PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t)); 1637 while (len-- > 0) { 1638 union { 1639 int16_t i; 1640 char a[sizeof(int16_t)]; 1641 } v; 1642 memcpy(v.a, s, sizeof(int16_t)); 1643 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 1644 s += sizeof(int16_t); 1645 UNPACK_PUSH(INT2FIX(v.i)); 1646 } 1647 PACK_ITEM_ADJUST(); 1648 } 1649 else { 1650 PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t)); 1651 while (len-- > 0) { 1652 union { 1653 uint16_t i; 1654 char a[sizeof(uint16_t)]; 1655 } v; 1656 memcpy(v.a, s, sizeof(uint16_t)); 1657 if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i); 1658 s += sizeof(uint16_t); 1659 UNPACK_PUSH(INT2FIX(v.i)); 1660 } 1661 PACK_ITEM_ADJUST(); 1662 } 1663 break; 1664#endif 1665 1666#if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK) 1667 case SIZEOF_INT32_T: 1668 if (signed_p) { 1669 PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t)); 1670 while (len-- > 0) { 1671 union { 1672 int32_t i; 1673 char a[sizeof(int32_t)]; 1674 } v; 1675 memcpy(v.a, s, sizeof(int32_t)); 1676 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 1677 s += sizeof(int32_t); 1678 UNPACK_PUSH(INT2NUM(v.i)); 1679 } 1680 PACK_ITEM_ADJUST(); 1681 } 1682 else { 1683 PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t)); 1684 while (len-- > 0) { 1685 union { 1686 uint32_t i; 1687 char a[sizeof(uint32_t)]; 1688 } v; 1689 memcpy(v.a, s, sizeof(uint32_t)); 1690 if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i); 1691 s += sizeof(uint32_t); 1692 UNPACK_PUSH(UINT2NUM(v.i)); 1693 } 1694 PACK_ITEM_ADJUST(); 1695 } 1696 break; 1697#endif 1698 1699#if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK) 1700 case SIZEOF_INT64_T: 1701 if (signed_p) { 1702 PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t)); 1703 while (len-- > 0) { 1704 union { 1705 int64_t i; 1706 char a[sizeof(int64_t)]; 1707 } v; 1708 memcpy(v.a, s, sizeof(int64_t)); 1709 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 1710 s += sizeof(int64_t); 1711 UNPACK_PUSH(INT64toNUM(v.i)); 1712 } 1713 PACK_ITEM_ADJUST(); 1714 } 1715 else { 1716 PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t)); 1717 while (len-- > 0) { 1718 union { 1719 uint64_t i; 1720 char a[sizeof(uint64_t)]; 1721 } v; 1722 memcpy(v.a, s, sizeof(uint64_t)); 1723 if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i); 1724 s += sizeof(uint64_t); 1725 UNPACK_PUSH(UINT64toNUM(v.i)); 1726 } 1727 PACK_ITEM_ADJUST(); 1728 } 1729 break; 1730#endif 1731 1732 default: 1733 if (integer_size > MAX_INTEGER_PACK_SIZE) 1734 rb_bug("unexpected integer size for pack: %d", integer_size); 1735 PACK_LENGTH_ADJUST_SIZE(integer_size); 1736 while (len-- > 0) { 1737 union { 1738 unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG]; 1739 char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG]; 1740 } v; 1741 int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG; 1742 int i; 1743 1744 if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0) 1745 memset(v.a, 0xff, sizeof(long)*num_longs); 1746 else 1747 memset(v.a, 0, sizeof(long)*num_longs); 1748 if (bigendian_p) 1749 memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size); 1750 else 1751 memcpy(v.a, s, integer_size); 1752 if (bigendian_p) { 1753 for (i = 0; i < num_longs/2; i++) { 1754 unsigned long t = v.i[i]; 1755 v.i[i] = v.i[num_longs-1-i]; 1756 v.i[num_longs-1-i] = t; 1757 } 1758 } 1759 if (bigendian_p != BIGENDIAN_P()) { 1760 for (i = 0; i < num_longs; i++) 1761 v.i[i] = swapl(v.i[i]); 1762 } 1763 s += integer_size; 1764 UNPACK_PUSH(rb_big_unpack(v.i, num_longs)); 1765 } 1766 PACK_ITEM_ADJUST(); 1767 break; 1768 } 1769 break; 1770 1771 case 'f': 1772 case 'F': 1773 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 1774 while (len-- > 0) { 1775 float tmp; 1776 memcpy(&tmp, s, sizeof(float)); 1777 s += sizeof(float); 1778 UNPACK_PUSH(DBL2NUM((double)tmp)); 1779 } 1780 PACK_ITEM_ADJUST(); 1781 break; 1782 1783 case 'e': 1784 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 1785 while (len-- > 0) { 1786 float tmp; 1787 FLOAT_CONVWITH(ftmp); 1788 1789 memcpy(&tmp, s, sizeof(float)); 1790 s += sizeof(float); 1791 tmp = VTOHF(tmp,ftmp); 1792 UNPACK_PUSH(DBL2NUM((double)tmp)); 1793 } 1794 PACK_ITEM_ADJUST(); 1795 break; 1796 1797 case 'E': 1798 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 1799 while (len-- > 0) { 1800 double tmp; 1801 DOUBLE_CONVWITH(dtmp); 1802 1803 memcpy(&tmp, s, sizeof(double)); 1804 s += sizeof(double); 1805 tmp = VTOHD(tmp,dtmp); 1806 UNPACK_PUSH(DBL2NUM(tmp)); 1807 } 1808 PACK_ITEM_ADJUST(); 1809 break; 1810 1811 case 'D': 1812 case 'd': 1813 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 1814 while (len-- > 0) { 1815 double tmp; 1816 memcpy(&tmp, s, sizeof(double)); 1817 s += sizeof(double); 1818 UNPACK_PUSH(DBL2NUM(tmp)); 1819 } 1820 PACK_ITEM_ADJUST(); 1821 break; 1822 1823 case 'g': 1824 PACK_LENGTH_ADJUST_SIZE(sizeof(float)); 1825 while (len-- > 0) { 1826 float tmp; 1827 FLOAT_CONVWITH(ftmp); 1828 1829 memcpy(&tmp, s, sizeof(float)); 1830 s += sizeof(float); 1831 tmp = NTOHF(tmp,ftmp); 1832 UNPACK_PUSH(DBL2NUM((double)tmp)); 1833 } 1834 PACK_ITEM_ADJUST(); 1835 break; 1836 1837 case 'G': 1838 PACK_LENGTH_ADJUST_SIZE(sizeof(double)); 1839 while (len-- > 0) { 1840 double tmp; 1841 DOUBLE_CONVWITH(dtmp); 1842 1843 memcpy(&tmp, s, sizeof(double)); 1844 s += sizeof(double); 1845 tmp = NTOHD(tmp,dtmp); 1846 UNPACK_PUSH(DBL2NUM(tmp)); 1847 } 1848 PACK_ITEM_ADJUST(); 1849 break; 1850 1851 case 'U': 1852 if (len > send - s) len = send - s; 1853 while (len > 0 && s < send) { 1854 long alen = send - s; 1855 unsigned long l; 1856 1857 l = utf8_to_uv(s, &alen); 1858 s += alen; len--; 1859 UNPACK_PUSH(ULONG2NUM(l)); 1860 } 1861 break; 1862 1863 case 'u': 1864 { 1865 VALUE buf = infected_str_new(0, (send - s)*3/4, str); 1866 char *ptr = RSTRING_PTR(buf); 1867 long total = 0; 1868 1869 while (s < send && *s > ' ' && *s < 'a') { 1870 long a,b,c,d; 1871 char hunk[4]; 1872 1873 hunk[3] = '\0'; 1874 len = (*s++ - ' ') & 077; 1875 total += len; 1876 if (total > RSTRING_LEN(buf)) { 1877 len -= total - RSTRING_LEN(buf); 1878 total = RSTRING_LEN(buf); 1879 } 1880 1881 while (len > 0) { 1882 long mlen = len > 3 ? 3 : len; 1883 1884 if (s < send && *s >= ' ') 1885 a = (*s++ - ' ') & 077; 1886 else 1887 a = 0; 1888 if (s < send && *s >= ' ') 1889 b = (*s++ - ' ') & 077; 1890 else 1891 b = 0; 1892 if (s < send && *s >= ' ') 1893 c = (*s++ - ' ') & 077; 1894 else 1895 c = 0; 1896 if (s < send && *s >= ' ') 1897 d = (*s++ - ' ') & 077; 1898 else 1899 d = 0; 1900 hunk[0] = (char)(a << 2 | b >> 4); 1901 hunk[1] = (char)(b << 4 | c >> 2); 1902 hunk[2] = (char)(c << 6 | d); 1903 memcpy(ptr, hunk, mlen); 1904 ptr += mlen; 1905 len -= mlen; 1906 } 1907 if (*s == '\r') s++; 1908 if (*s == '\n') s++; 1909 else if (s < send && (s+1 == send || s[1] == '\n')) 1910 s += 2; /* possible checksum byte */ 1911 } 1912 1913 rb_str_set_len(buf, total); 1914 UNPACK_PUSH(buf); 1915 } 1916 break; 1917 1918 case 'm': 1919 { 1920 VALUE buf = infected_str_new(0, (send - s)*3/4, str); 1921 char *ptr = RSTRING_PTR(buf); 1922 int a = -1,b = -1,c = 0,d = 0; 1923 static signed char b64_xtable[256]; 1924 1925 if (b64_xtable['/'] <= 0) { 1926 int i; 1927 1928 for (i = 0; i < 256; i++) { 1929 b64_xtable[i] = -1; 1930 } 1931 for (i = 0; i < 64; i++) { 1932 b64_xtable[(unsigned char)b64_table[i]] = (char)i; 1933 } 1934 } 1935 if (len == 0) { 1936 while (s < send) { 1937 a = b = c = d = -1; 1938 a = b64_xtable[(unsigned char)*s++]; 1939 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64"); 1940 b = b64_xtable[(unsigned char)*s++]; 1941 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64"); 1942 if (*s == '=') { 1943 if (s + 2 == send && *(s + 1) == '=') break; 1944 rb_raise(rb_eArgError, "invalid base64"); 1945 } 1946 c = b64_xtable[(unsigned char)*s++]; 1947 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64"); 1948 if (s + 1 == send && *s == '=') break; 1949 d = b64_xtable[(unsigned char)*s++]; 1950 if (d == -1) rb_raise(rb_eArgError, "invalid base64"); 1951 *ptr++ = castchar(a << 2 | b >> 4); 1952 *ptr++ = castchar(b << 4 | c >> 2); 1953 *ptr++ = castchar(c << 6 | d); 1954 } 1955 if (c == -1) { 1956 *ptr++ = castchar(a << 2 | b >> 4); 1957 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64"); 1958 } 1959 else if (d == -1) { 1960 *ptr++ = castchar(a << 2 | b >> 4); 1961 *ptr++ = castchar(b << 4 | c >> 2); 1962 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64"); 1963 } 1964 } 1965 else { 1966 while (s < send) { 1967 a = b = c = d = -1; 1968 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} 1969 if (s >= send) break; 1970 s++; 1971 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;} 1972 if (s >= send) break; 1973 s++; 1974 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} 1975 if (*s == '=' || s >= send) break; 1976 s++; 1977 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;} 1978 if (*s == '=' || s >= send) break; 1979 s++; 1980 *ptr++ = castchar(a << 2 | b >> 4); 1981 *ptr++ = castchar(b << 4 | c >> 2); 1982 *ptr++ = castchar(c << 6 | d); 1983 } 1984 if (a != -1 && b != -1) { 1985 if (c == -1 && *s == '=') 1986 *ptr++ = castchar(a << 2 | b >> 4); 1987 else if (c != -1 && *s == '=') { 1988 *ptr++ = castchar(a << 2 | b >> 4); 1989 *ptr++ = castchar(b << 4 | c >> 2); 1990 } 1991 } 1992 } 1993 rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); 1994 UNPACK_PUSH(buf); 1995 } 1996 break; 1997 1998 case 'M': 1999 { 2000 VALUE buf = infected_str_new(0, send - s, str); 2001 char *ptr = RSTRING_PTR(buf), *ss = s; 2002 int c1, c2; 2003 2004 while (s < send) { 2005 if (*s == '=') { 2006 if (++s == send) break; 2007 if (s+1 < send && *s == '\r' && *(s+1) == '\n') 2008 s++; 2009 if (*s != '\n') { 2010 if ((c1 = hex2num(*s)) == -1) break; 2011 if (++s == send) break; 2012 if ((c2 = hex2num(*s)) == -1) break; 2013 *ptr++ = castchar(c1 << 4 | c2); 2014 } 2015 } 2016 else { 2017 *ptr++ = *s; 2018 } 2019 s++; 2020 ss = s; 2021 } 2022 rb_str_set_len(buf, ptr - RSTRING_PTR(buf)); 2023 rb_str_buf_cat(buf, ss, send-ss); 2024 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID); 2025 UNPACK_PUSH(buf); 2026 } 2027 break; 2028 2029 case '@': 2030 if (len > RSTRING_LEN(str)) 2031 rb_raise(rb_eArgError, "@ outside of string"); 2032 s = RSTRING_PTR(str) + len; 2033 break; 2034 2035 case 'X': 2036 if (len > s - RSTRING_PTR(str)) 2037 rb_raise(rb_eArgError, "X outside of string"); 2038 s -= len; 2039 break; 2040 2041 case 'x': 2042 if (len > send - s) 2043 rb_raise(rb_eArgError, "x outside of string"); 2044 s += len; 2045 break; 2046 2047 case 'P': 2048 if (sizeof(char *) <= (size_t)(send - s)) { 2049 VALUE tmp = Qnil; 2050 char *t; 2051 2052 memcpy(&t, s, sizeof(char *)); 2053 s += sizeof(char *); 2054 2055 if (t) { 2056 VALUE a, *p, *pend; 2057 2058 if (!(a = rb_str_associated(str))) { 2059 rb_raise(rb_eArgError, "no associated pointer"); 2060 } 2061 p = RARRAY_PTR(a); 2062 pend = p + RARRAY_LEN(a); 2063 while (p < pend) { 2064 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) { 2065 if (len < RSTRING_LEN(*p)) { 2066 tmp = rb_tainted_str_new(t, len); 2067 rb_str_associate(tmp, a); 2068 } 2069 else { 2070 tmp = *p; 2071 } 2072 break; 2073 } 2074 p++; 2075 } 2076 if (p == pend) { 2077 rb_raise(rb_eArgError, "non associated pointer"); 2078 } 2079 } 2080 UNPACK_PUSH(tmp); 2081 } 2082 break; 2083 2084 case 'p': 2085 if (len > (long)((send - s) / sizeof(char *))) 2086 len = (send - s) / sizeof(char *); 2087 while (len-- > 0) { 2088 if ((size_t)(send - s) < sizeof(char *)) 2089 break; 2090 else { 2091 VALUE tmp = Qnil; 2092 char *t; 2093 2094 memcpy(&t, s, sizeof(char *)); 2095 s += sizeof(char *); 2096 2097 if (t) { 2098 VALUE a, *p, *pend; 2099 2100 if (!(a = rb_str_associated(str))) { 2101 rb_raise(rb_eArgError, "no associated pointer"); 2102 } 2103 p = RARRAY_PTR(a); 2104 pend = p + RARRAY_LEN(a); 2105 while (p < pend) { 2106 if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) { 2107 tmp = *p; 2108 break; 2109 } 2110 p++; 2111 } 2112 if (p == pend) { 2113 rb_raise(rb_eArgError, "non associated pointer"); 2114 } 2115 } 2116 UNPACK_PUSH(tmp); 2117 } 2118 } 2119 break; 2120 2121 case 'w': 2122 { 2123 unsigned long ul = 0; 2124 unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8); 2125 2126 while (len > 0 && s < send) { 2127 ul <<= 7; 2128 ul |= (*s & 0x7f); 2129 if (!(*s++ & 0x80)) { 2130 UNPACK_PUSH(ULONG2NUM(ul)); 2131 len--; 2132 ul = 0; 2133 } 2134 else if (ul & ulmask) { 2135 VALUE big = rb_uint2big(ul); 2136 VALUE big128 = rb_uint2big(128); 2137 while (s < send) { 2138 big = rb_big_mul(big, big128); 2139 big = rb_big_plus(big, rb_uint2big(*s & 0x7f)); 2140 if (!(*s++ & 0x80)) { 2141 UNPACK_PUSH(big); 2142 len--; 2143 ul = 0; 2144 break; 2145 } 2146 } 2147 } 2148 } 2149 } 2150 break; 2151 2152 default: 2153 rb_warning("unknown unpack directive '%c' in '%s'", 2154 type, RSTRING_PTR(fmt)); 2155 break; 2156 } 2157 } 2158 2159 return ary; 2160} 2161 2162#define BYTEWIDTH 8 2163 2164int 2165rb_uv_to_utf8(char buf[6], unsigned long uv) 2166{ 2167 if (uv <= 0x7f) { 2168 buf[0] = (char)uv; 2169 return 1; 2170 } 2171 if (uv <= 0x7ff) { 2172 buf[0] = castchar(((uv>>6)&0xff)|0xc0); 2173 buf[1] = castchar((uv&0x3f)|0x80); 2174 return 2; 2175 } 2176 if (uv <= 0xffff) { 2177 buf[0] = castchar(((uv>>12)&0xff)|0xe0); 2178 buf[1] = castchar(((uv>>6)&0x3f)|0x80); 2179 buf[2] = castchar((uv&0x3f)|0x80); 2180 return 3; 2181 } 2182 if (uv <= 0x1fffff) { 2183 buf[0] = castchar(((uv>>18)&0xff)|0xf0); 2184 buf[1] = castchar(((uv>>12)&0x3f)|0x80); 2185 buf[2] = castchar(((uv>>6)&0x3f)|0x80); 2186 buf[3] = castchar((uv&0x3f)|0x80); 2187 return 4; 2188 } 2189 if (uv <= 0x3ffffff) { 2190 buf[0] = castchar(((uv>>24)&0xff)|0xf8); 2191 buf[1] = castchar(((uv>>18)&0x3f)|0x80); 2192 buf[2] = castchar(((uv>>12)&0x3f)|0x80); 2193 buf[3] = castchar(((uv>>6)&0x3f)|0x80); 2194 buf[4] = castchar((uv&0x3f)|0x80); 2195 return 5; 2196 } 2197 if (uv <= 0x7fffffff) { 2198 buf[0] = castchar(((uv>>30)&0xff)|0xfc); 2199 buf[1] = castchar(((uv>>24)&0x3f)|0x80); 2200 buf[2] = castchar(((uv>>18)&0x3f)|0x80); 2201 buf[3] = castchar(((uv>>12)&0x3f)|0x80); 2202 buf[4] = castchar(((uv>>6)&0x3f)|0x80); 2203 buf[5] = castchar((uv&0x3f)|0x80); 2204 return 6; 2205 } 2206 rb_raise(rb_eRangeError, "pack(U): value out of range"); 2207 2208 UNREACHABLE; 2209} 2210 2211static const unsigned long utf8_limits[] = { 2212 0x0, /* 1 */ 2213 0x80, /* 2 */ 2214 0x800, /* 3 */ 2215 0x10000, /* 4 */ 2216 0x200000, /* 5 */ 2217 0x4000000, /* 6 */ 2218 0x80000000, /* 7 */ 2219}; 2220 2221static unsigned long 2222utf8_to_uv(const char *p, long *lenp) 2223{ 2224 int c = *p++ & 0xff; 2225 unsigned long uv = c; 2226 long n; 2227 2228 if (!(uv & 0x80)) { 2229 *lenp = 1; 2230 return uv; 2231 } 2232 if (!(uv & 0x40)) { 2233 *lenp = 1; 2234 rb_raise(rb_eArgError, "malformed UTF-8 character"); 2235 } 2236 2237 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } 2238 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } 2239 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } 2240 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } 2241 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } 2242 else { 2243 *lenp = 1; 2244 rb_raise(rb_eArgError, "malformed UTF-8 character"); 2245 } 2246 if (n > *lenp) { 2247 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)", 2248 n, *lenp); 2249 } 2250 *lenp = n--; 2251 if (n != 0) { 2252 while (n--) { 2253 c = *p++ & 0xff; 2254 if ((c & 0xc0) != 0x80) { 2255 *lenp -= n + 1; 2256 rb_raise(rb_eArgError, "malformed UTF-8 character"); 2257 } 2258 else { 2259 c &= 0x3f; 2260 uv = uv << 6 | c; 2261 } 2262 } 2263 } 2264 n = *lenp - 1; 2265 if (uv < utf8_limits[n]) { 2266 rb_raise(rb_eArgError, "redundant UTF-8 sequence"); 2267 } 2268 return uv; 2269} 2270 2271void 2272Init_pack(void) 2273{ 2274 rb_define_method(rb_cArray, "pack", pack_pack, 1); 2275 rb_define_method(rb_cString, "unpack", pack_unpack, 1); 2276} 2277