1/**********************************************************************
2
3  pack.c -
4
5  $Author: eregon $
6  created at: Thu Feb 10 15:17:05 JST 1994
7
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "ruby/ruby.h"
13#include "ruby/encoding.h"
14#include <sys/types.h>
15#include <ctype.h>
16#include <errno.h>
17
18#define GCC_VERSION_SINCE(major, minor, patchlevel) \
19  (defined(__GNUC__) && !defined(__INTEL_COMPILER) && \
20   ((__GNUC__ > (major)) ||  \
21    (__GNUC__ == (major) && __GNUC_MINOR__ > (minor)) || \
22    (__GNUC__ == (major) && __GNUC_MINOR__ == (minor) && __GNUC_PATCHLEVEL__ >= (patchlevel))))
23#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4
24# define NATINT_PACK
25#endif
26
27#ifdef DYNAMIC_ENDIAN
28 /* for universal binary of NEXTSTEP and MacOS X */
29 /* useless since autoconf 2.63? */
30 static int
31 is_bigendian(void)
32 {
33     static int init = 0;
34     static int endian_value;
35     char *p;
36
37     if (init) return endian_value;
38     init = 1;
39     p = (char*)&init;
40     return endian_value = p[0]?0:1;
41 }
42# define BIGENDIAN_P() (is_bigendian())
43#elif defined(WORDS_BIGENDIAN)
44# define BIGENDIAN_P() 1
45#else
46# define BIGENDIAN_P() 0
47#endif
48
49#ifdef NATINT_PACK
50# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
51#else
52# define NATINT_LEN(type,len) ((int)sizeof(type))
53#endif
54
55#if SIZEOF_LONG == 8
56# define INT64toNUM(x) LONG2NUM(x)
57# define UINT64toNUM(x) ULONG2NUM(x)
58#elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
59# define INT64toNUM(x) LL2NUM(x)
60# define UINT64toNUM(x) ULL2NUM(x)
61#endif
62
63#define define_swapx(x, xtype)		\
64static xtype				\
65TOKEN_PASTE(swap,x)(xtype z)		\
66{					\
67    xtype r;				\
68    xtype *zp;				\
69    unsigned char *s, *t;		\
70    int i;				\
71					\
72    zp = xmalloc(sizeof(xtype));	\
73    *zp = z;				\
74    s = (unsigned char*)zp;		\
75    t = xmalloc(sizeof(xtype));		\
76    for (i=0; i<sizeof(xtype); i++) {	\
77	t[sizeof(xtype)-i-1] = s[i];	\
78    }					\
79    r = *(xtype *)t;			\
80    xfree(t);				\
81    xfree(zp);				\
82    return r;				\
83}
84
85#if GCC_VERSION_SINCE(4,3,0)
86# define swap32(x) __builtin_bswap32(x)
87# define swap64(x) __builtin_bswap64(x)
88#endif
89
90#ifndef swap16
91# define swap16(x)	((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
92#endif
93
94#ifndef swap32
95# define swap32(x)	((uint32_t)((((x)&0xFF)<<24)	\
96			|(((x)>>24)&0xFF)	\
97			|(((x)&0x0000FF00)<<8)	\
98			|(((x)&0x00FF0000)>>8)	))
99#endif
100
101#ifndef swap64
102# ifdef HAVE_INT64_T
103#  define byte_in_64bit(n) ((uint64_t)0xff << (n))
104#  define swap64(x)       ((uint64_t)((((x)&byte_in_64bit(0))<<56) 	\
105			   |(((x)>>56)&0xFF)	                \
106			   |(((x)&byte_in_64bit(8))<<40)	\
107			   |(((x)&byte_in_64bit(48))>>40)	\
108			   |(((x)&byte_in_64bit(16))<<24)	\
109			   |(((x)&byte_in_64bit(40))>>24)	\
110			   |(((x)&byte_in_64bit(24))<<8)	\
111			   |(((x)&byte_in_64bit(32))>>8)))
112# endif
113#endif
114
115#if SIZEOF_SHORT == 2
116# define swaps(x)	swap16(x)
117#elif SIZEOF_SHORT == 4
118# define swaps(x)	swap32(x)
119#else
120  define_swapx(s,short)
121#endif
122
123#if SIZEOF_INT == 2
124# define swapi(x)	swap16(x)
125#elif SIZEOF_INT == 4
126# define swapi(x)	swap32(x)
127#else
128  define_swapx(i,int)
129#endif
130
131#if SIZEOF_LONG == 4
132# define swapl(x)	swap32(x)
133#elif SIZEOF_LONG == 8
134# define swapl(x)        swap64(x)
135#else
136  define_swapx(l,long)
137#endif
138
139#ifdef HAVE_LONG_LONG
140# if SIZEOF_LONG_LONG == 8
141#  define swapll(x)        swap64(x)
142# else
143   define_swapx(ll,LONG_LONG)
144# endif
145#endif
146
147#if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T)
148#   define swapf(x)	swap32(x)
149#   define FLOAT_SWAPPER	uint32_t
150#else
151    define_swapx(f,float)
152#endif
153
154#if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T)
155#   define swapd(x)	swap64(x)
156#   define DOUBLE_SWAPPER	uint64_t
157#elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T)
158    static double
159    swapd(const double d)
160    {
161	double dtmp = d;
162	uint32_t utmp[2];
163	uint32_t utmp0;
164
165	utmp[0] = 0; utmp[1] = 0;
166	memcpy(utmp,&dtmp,sizeof(double));
167	utmp0 = utmp[0];
168	utmp[0] = swap32(utmp[1]);
169	utmp[1] = swap32(utmp0);
170	memcpy(&dtmp,utmp,sizeof(double));
171	return dtmp;
172    }
173#else
174    define_swapx(d, double)
175#endif
176
177#undef define_swapx
178
179#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
180#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
181#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
182#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
183#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
184#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
185#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
186#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
187
188#ifdef FLOAT_SWAPPER
189# define FLOAT_CONVWITH(y)	FLOAT_SWAPPER y;
190# define HTONF(x,y)	(memcpy(&(y),&(x),sizeof(float)),	\
191			 (y) = rb_htonf((FLOAT_SWAPPER)(y)),	\
192			 memcpy(&(x),&(y),sizeof(float)),	\
193			 (x))
194# define HTOVF(x,y)	(memcpy(&(y),&(x),sizeof(float)),	\
195			 (y) = rb_htovf((FLOAT_SWAPPER)(y)),	\
196			 memcpy(&(x),&(y),sizeof(float)),	\
197			 (x))
198# define NTOHF(x,y)	(memcpy(&(y),&(x),sizeof(float)),	\
199			 (y) = rb_ntohf((FLOAT_SWAPPER)(y)),	\
200			 memcpy(&(x),&(y),sizeof(float)),	\
201			 (x))
202# define VTOHF(x,y)	(memcpy(&(y),&(x),sizeof(float)),	\
203			 (y) = rb_vtohf((FLOAT_SWAPPER)(y)),	\
204			 memcpy(&(x),&(y),sizeof(float)),	\
205			 (x))
206#else
207# define FLOAT_CONVWITH(y)
208# define HTONF(x,y)	rb_htonf(x)
209# define HTOVF(x,y)	rb_htovf(x)
210# define NTOHF(x,y)	rb_ntohf(x)
211# define VTOHF(x,y)	rb_vtohf(x)
212#endif
213
214#ifdef DOUBLE_SWAPPER
215# define DOUBLE_CONVWITH(y)	DOUBLE_SWAPPER y;
216# define HTOND(x,y)	(memcpy(&(y),&(x),sizeof(double)),	\
217			 (y) = rb_htond((DOUBLE_SWAPPER)(y)),	\
218			 memcpy(&(x),&(y),sizeof(double)),	\
219			 (x))
220# define HTOVD(x,y)	(memcpy(&(y),&(x),sizeof(double)),	\
221			 (y) = rb_htovd((DOUBLE_SWAPPER)(y)),	\
222			 memcpy(&(x),&(y),sizeof(double)),	\
223			 (x))
224# define NTOHD(x,y)	(memcpy(&(y),&(x),sizeof(double)),	\
225			 (y) = rb_ntohd((DOUBLE_SWAPPER)(y)),	\
226			 memcpy(&(x),&(y),sizeof(double)),	\
227			 (x))
228# define VTOHD(x,y)	(memcpy(&(y),&(x),sizeof(double)),	\
229			 (y) = rb_vtohd((DOUBLE_SWAPPER)(y)),	\
230			 memcpy(&(x),&(y),sizeof(double)),	\
231			 (x))
232#else
233# define DOUBLE_CONVWITH(y)
234# define HTOND(x,y)	rb_htond(x)
235# define HTOVD(x,y)	rb_htovd(x)
236# define NTOHD(x,y)	rb_ntohd(x)
237# define VTOHD(x,y)	rb_vtohd(x)
238#endif
239
240static unsigned long
241num2i32(VALUE x)
242{
243    x = rb_to_int(x); /* is nil OK? (should not) */
244
245    if (FIXNUM_P(x)) return FIX2LONG(x);
246    if (RB_TYPE_P(x, T_BIGNUM)) {
247	return rb_big2ulong_pack(x);
248    }
249    rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x));
250
251    UNREACHABLE;
252}
253
254#define MAX_INTEGER_PACK_SIZE 8
255/* #define FORCE_BIG_PACK */
256
257static const char toofew[] = "too few arguments";
258
259static void encodes(VALUE,const char*,long,int,int);
260static void qpencode(VALUE,VALUE,long);
261
262static unsigned long utf8_to_uv(const char*,long*);
263
264/*
265 *  call-seq:
266 *     arr.pack ( aTemplateString ) -> aBinaryString
267 *
268 *  Packs the contents of <i>arr</i> into a binary sequence according to
269 *  the directives in <i>aTemplateString</i> (see the table below)
270 *  Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
271 *  which gives the width of the resulting field. The remaining
272 *  directives also may take a count, indicating the number of array
273 *  elements to convert. If the count is an asterisk
274 *  (``<code>*</code>''), all remaining array elements will be
275 *  converted. Any of the directives ``<code>sSiIlL</code>'' may be
276 *  followed by an underscore (``<code>_</code>'') or
277 *  exclamation mark (``<code>!</code>'') to use the underlying
278 *  platform's native size for the specified type; otherwise, they use a
279 *  platform-independent size. Spaces are ignored in the template
280 *  string. See also <code>String#unpack</code>.
281 *
282 *     a = [ "a", "b", "c" ]
283 *     n = [ 65, 66, 67 ]
284 *     a.pack("A3A3A3")   #=> "a  b  c  "
285 *     a.pack("a3a3a3")   #=> "a\000\000b\000\000c\000\000"
286 *     n.pack("ccc")      #=> "ABC"
287 *
288 *  Directives for +pack+.
289 *
290 *   Integer      | Array   |
291 *   Directive    | Element | Meaning
292 *   ---------------------------------------------------------------------------
293 *      C         | Integer | 8-bit unsigned (unsigned char)
294 *      S         | Integer | 16-bit unsigned, native endian (uint16_t)
295 *      L         | Integer | 32-bit unsigned, native endian (uint32_t)
296 *      Q         | Integer | 64-bit unsigned, native endian (uint64_t)
297 *                |         |
298 *      c         | Integer | 8-bit signed (signed char)
299 *      s         | Integer | 16-bit signed, native endian (int16_t)
300 *      l         | Integer | 32-bit signed, native endian (int32_t)
301 *      q         | Integer | 64-bit signed, native endian (int64_t)
302 *                |         |
303 *      S_, S!    | Integer | unsigned short, native endian
304 *      I, I_, I! | Integer | unsigned int, native endian
305 *      L_, L!    | Integer | unsigned long, native endian
306 *                |         |
307 *      s_, s!    | Integer | signed short, native endian
308 *      i, i_, i! | Integer | signed int, native endian
309 *      l_, l!    | Integer | signed long, native endian
310 *                |         |
311 *      S> L> Q>  | Integer | same as the directives without ">" except
312 *      s> l> q>  |         | big endian
313 *      S!> I!>   |         | (available since Ruby 1.9.3)
314 *      L!>       |         | "S>" is same as "n"
315 *      s!> i!>   |         | "L>" is same as "N"
316 *      l!>       |         |
317 *                |         |
318 *      S< L< Q<  | Integer | same as the directives without "<" except
319 *      s< l< q<  |         | little endian
320 *      S!< I!<   |         | (available since Ruby 1.9.3)
321 *      L!<       |         | "S<" is same as "v"
322 *      s!< i!<   |         | "L<" is same as "V"
323 *      l!<       |         |
324 *                |         |
325 *      n         | Integer | 16-bit unsigned, network (big-endian) byte order
326 *      N         | Integer | 32-bit unsigned, network (big-endian) byte order
327 *      v         | Integer | 16-bit unsigned, VAX (little-endian) byte order
328 *      V         | Integer | 32-bit unsigned, VAX (little-endian) byte order
329 *                |         |
330 *      U         | Integer | UTF-8 character
331 *      w         | Integer | BER-compressed integer
332 *
333 *   Float        |         |
334 *   Directive    |         | Meaning
335 *   ---------------------------------------------------------------------------
336 *      D, d      | Float   | double-precision, native format
337 *      F, f      | Float   | single-precision, native format
338 *      E         | Float   | double-precision, little-endian byte order
339 *      e         | Float   | single-precision, little-endian byte order
340 *      G         | Float   | double-precision, network (big-endian) byte order
341 *      g         | Float   | single-precision, network (big-endian) byte order
342 *
343 *   String       |         |
344 *   Directive    |         | Meaning
345 *   ---------------------------------------------------------------------------
346 *      A         | String  | arbitrary binary string (space padded, count is width)
347 *      a         | String  | arbitrary binary string (null padded, count is width)
348 *      Z         | String  | same as ``a'', except that null is added with *
349 *      B         | String  | bit string (MSB first)
350 *      b         | String  | bit string (LSB first)
351 *      H         | String  | hex string (high nibble first)
352 *      h         | String  | hex string (low nibble first)
353 *      u         | String  | UU-encoded string
354 *      M         | String  | quoted printable, MIME encoding (see RFC2045)
355 *      m         | String  | base64 encoded string (see RFC 2045, count is width)
356 *                |         | (if count is 0, no line feed are added, see RFC 4648)
357 *      P         | String  | pointer to a structure (fixed-length string)
358 *      p         | String  | pointer to a null-terminated string
359 *
360 *   Misc.        |         |
361 *   Directive    |         | Meaning
362 *   ---------------------------------------------------------------------------
363 *      @         | ---     | moves to absolute position
364 *      X         | ---     | back up a byte
365 *      x         | ---     | null byte
366 */
367
368static VALUE
369pack_pack(VALUE ary, VALUE fmt)
370{
371    static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
372    static const char spc10[] = "          ";
373    const char *p, *pend;
374    VALUE res, from, associates = 0;
375    char type;
376    long items, len, idx, plen;
377    const char *ptr;
378    int enc_info = 1;		/* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
379#ifdef NATINT_PACK
380    int natint;		/* native integer */
381#endif
382    int integer_size, bigendian_p;
383
384    StringValue(fmt);
385    p = RSTRING_PTR(fmt);
386    pend = p + RSTRING_LEN(fmt);
387    res = rb_str_buf_new(0);
388
389    items = RARRAY_LEN(ary);
390    idx = 0;
391
392#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
393#define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
394#define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
395
396    while (p < pend) {
397	int explicit_endian = 0;
398	if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
399	    rb_raise(rb_eRuntimeError, "format string modified");
400	}
401	type = *p++;		/* get data type */
402#ifdef NATINT_PACK
403	natint = 0;
404#endif
405
406	if (ISSPACE(type)) continue;
407	if (type == '#') {
408	    while ((p < pend) && (*p != '\n')) {
409		p++;
410	    }
411	    continue;
412	}
413
414	{
415	    static const char natstr[] = "sSiIlL";
416	    static const char endstr[] = "sSiIlLqQ";
417
418          modifiers:
419	    switch (*p) {
420	      case '_':
421	      case '!':
422		if (strchr(natstr, type)) {
423#ifdef NATINT_PACK
424		    natint = 1;
425#endif
426		    p++;
427		}
428		else {
429		    rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
430		}
431		goto modifiers;
432
433	      case '<':
434	      case '>':
435		if (!strchr(endstr, type)) {
436		    rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
437		}
438		if (explicit_endian) {
439		    rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
440		}
441		explicit_endian = *p++;
442		goto modifiers;
443	    }
444	}
445
446	if (*p == '*') {	/* set data length */
447	    len = strchr("@Xxu", type) ? 0
448                : strchr("PMm", type) ? 1
449                : items;
450	    p++;
451	}
452	else if (ISDIGIT(*p)) {
453	    errno = 0;
454	    len = STRTOUL(p, (char**)&p, 10);
455	    if (errno) {
456		rb_raise(rb_eRangeError, "pack length too big");
457	    }
458	}
459	else {
460	    len = 1;
461	}
462
463	switch (type) {
464	  case 'U':
465	    /* if encoding is US-ASCII, upgrade to UTF-8 */
466	    if (enc_info == 1) enc_info = 2;
467	    break;
468	  case 'm': case 'M': case 'u':
469	    /* keep US-ASCII (do nothing) */
470	    break;
471	  default:
472	    /* fall back to BINARY */
473	    enc_info = 0;
474	    break;
475	}
476	switch (type) {
477	  case 'A': case 'a': case 'Z':
478	  case 'B': case 'b':
479	  case 'H': case 'h':
480	    from = NEXTFROM;
481	    if (NIL_P(from)) {
482		ptr = "";
483		plen = 0;
484	    }
485	    else {
486		StringValue(from);
487		ptr = RSTRING_PTR(from);
488		plen = RSTRING_LEN(from);
489		OBJ_INFECT(res, from);
490	    }
491
492	    if (p[-1] == '*')
493		len = plen;
494
495	    switch (type) {
496	      case 'a':		/* arbitrary binary string (null padded)  */
497	      case 'A':         /* arbitrary binary string (ASCII space padded) */
498	      case 'Z':         /* null terminated string  */
499		if (plen >= len) {
500		    rb_str_buf_cat(res, ptr, len);
501		    if (p[-1] == '*' && type == 'Z')
502			rb_str_buf_cat(res, nul10, 1);
503		}
504		else {
505		    rb_str_buf_cat(res, ptr, plen);
506		    len -= plen;
507		    while (len >= 10) {
508			rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
509			len -= 10;
510		    }
511		    rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
512		}
513		break;
514
515#define castchar(from) (char)((from) & 0xff)
516
517	      case 'b':		/* bit string (ascending) */
518		{
519		    int byte = 0;
520		    long i, j = 0;
521
522		    if (len > plen) {
523			j = (len - plen + 1)/2;
524			len = plen;
525		    }
526		    for (i=0; i++ < len; ptr++) {
527			if (*ptr & 1)
528			    byte |= 128;
529			if (i & 7)
530			    byte >>= 1;
531			else {
532			    char c = castchar(byte);
533			    rb_str_buf_cat(res, &c, 1);
534			    byte = 0;
535			}
536		    }
537		    if (len & 7) {
538			char c;
539			byte >>= 7 - (len & 7);
540			c = castchar(byte);
541			rb_str_buf_cat(res, &c, 1);
542		    }
543		    len = j;
544		    goto grow;
545		}
546		break;
547
548	      case 'B':		/* bit string (descending) */
549		{
550		    int byte = 0;
551		    long i, j = 0;
552
553		    if (len > plen) {
554			j = (len - plen + 1)/2;
555			len = plen;
556		    }
557		    for (i=0; i++ < len; ptr++) {
558			byte |= *ptr & 1;
559			if (i & 7)
560			    byte <<= 1;
561			else {
562			    char c = castchar(byte);
563			    rb_str_buf_cat(res, &c, 1);
564			    byte = 0;
565			}
566		    }
567		    if (len & 7) {
568			char c;
569			byte <<= 7 - (len & 7);
570			c = castchar(byte);
571			rb_str_buf_cat(res, &c, 1);
572		    }
573		    len = j;
574		    goto grow;
575		}
576		break;
577
578	      case 'h':		/* hex string (low nibble first) */
579		{
580		    int byte = 0;
581		    long i, j = 0;
582
583		    if (len > plen) {
584			j = (len + 1) / 2 - (plen + 1) / 2;
585			len = plen;
586		    }
587		    for (i=0; i++ < len; ptr++) {
588			if (ISALPHA(*ptr))
589			    byte |= (((*ptr & 15) + 9) & 15) << 4;
590			else
591			    byte |= (*ptr & 15) << 4;
592			if (i & 1)
593			    byte >>= 4;
594			else {
595			    char c = castchar(byte);
596			    rb_str_buf_cat(res, &c, 1);
597			    byte = 0;
598			}
599		    }
600		    if (len & 1) {
601			char c = castchar(byte);
602			rb_str_buf_cat(res, &c, 1);
603		    }
604		    len = j;
605		    goto grow;
606		}
607		break;
608
609	      case 'H':		/* hex string (high nibble first) */
610		{
611		    int byte = 0;
612		    long i, j = 0;
613
614		    if (len > plen) {
615			j = (len + 1) / 2 - (plen + 1) / 2;
616			len = plen;
617		    }
618		    for (i=0; i++ < len; ptr++) {
619			if (ISALPHA(*ptr))
620			    byte |= ((*ptr & 15) + 9) & 15;
621			else
622			    byte |= *ptr & 15;
623			if (i & 1)
624			    byte <<= 4;
625			else {
626			    char c = castchar(byte);
627			    rb_str_buf_cat(res, &c, 1);
628			    byte = 0;
629			}
630		    }
631		    if (len & 1) {
632			char c = castchar(byte);
633			rb_str_buf_cat(res, &c, 1);
634		    }
635		    len = j;
636		    goto grow;
637		}
638		break;
639	    }
640	    break;
641
642	  case 'c':		/* signed char */
643	  case 'C':		/* unsigned char */
644	    while (len-- > 0) {
645		char c;
646
647		from = NEXTFROM;
648		c = (char)num2i32(from);
649		rb_str_buf_cat(res, &c, sizeof(char));
650	    }
651	    break;
652
653	  case 's':		/* signed short */
654            integer_size = NATINT_LEN(short, 2);
655            bigendian_p = BIGENDIAN_P();
656            goto pack_integer;
657
658	  case 'S':		/* unsigned short */
659            integer_size = NATINT_LEN(short, 2);
660            bigendian_p = BIGENDIAN_P();
661            goto pack_integer;
662
663	  case 'i':		/* signed int */
664            integer_size = (int)sizeof(int);
665            bigendian_p = BIGENDIAN_P();
666            goto pack_integer;
667
668	  case 'I':		/* unsigned int */
669            integer_size = (int)sizeof(int);
670            bigendian_p = BIGENDIAN_P();
671            goto pack_integer;
672
673	  case 'l':		/* signed long */
674            integer_size = NATINT_LEN(long, 4);
675            bigendian_p = BIGENDIAN_P();
676            goto pack_integer;
677
678	  case 'L':		/* unsigned long */
679            integer_size = NATINT_LEN(long, 4);
680            bigendian_p = BIGENDIAN_P();
681            goto pack_integer;
682
683	  case 'q':		/* signed quad (64bit) int */
684	    integer_size = 8;
685            bigendian_p = BIGENDIAN_P();
686            goto pack_integer;
687
688	  case 'Q':		/* unsigned quad (64bit) int */
689	    integer_size = 8;
690            bigendian_p = BIGENDIAN_P();
691            goto pack_integer;
692
693	  case 'n':		/* unsigned short (network byte-order)  */
694            integer_size = 2;
695            bigendian_p = 1;
696            goto pack_integer;
697
698	  case 'N':		/* unsigned long (network byte-order) */
699            integer_size = 4;
700            bigendian_p = 1;
701            goto pack_integer;
702
703	  case 'v':		/* unsigned short (VAX byte-order) */
704            integer_size = 2;
705            bigendian_p = 0;
706            goto pack_integer;
707
708	  case 'V':		/* unsigned long (VAX byte-order) */
709            integer_size = 4;
710            bigendian_p = 0;
711            goto pack_integer;
712
713          pack_integer:
714	    if (explicit_endian) {
715		bigendian_p = explicit_endian == '>';
716	    }
717
718            switch (integer_size) {
719#if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
720              case SIZEOF_INT16_T:
721		while (len-- > 0) {
722                    union {
723                        int16_t i;
724                        char a[sizeof(int16_t)];
725                    } v;
726
727		    from = NEXTFROM;
728		    v.i = (int16_t)num2i32(from);
729		    if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
730		    rb_str_buf_cat(res, v.a, sizeof(int16_t));
731		}
732		break;
733#endif
734
735#if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
736              case SIZEOF_INT32_T:
737		while (len-- > 0) {
738		    union {
739                        int32_t i;
740                        char a[sizeof(int32_t)];
741                    } v;
742
743		    from = NEXTFROM;
744		    v.i = (int32_t)num2i32(from);
745		    if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
746		    rb_str_buf_cat(res, v.a, sizeof(int32_t));
747		}
748		break;
749#endif
750
751#if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK)
752              case SIZEOF_INT64_T:
753		while (len-- > 0) {
754		    union {
755                        int64_t i;
756                        char a[sizeof(int64_t)];
757                    } v;
758
759		    from = NEXTFROM;
760		    v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */
761		    if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
762		    rb_str_buf_cat(res, v.a, sizeof(int64_t));
763		}
764		break;
765#endif
766
767	      default:
768                if (integer_size > MAX_INTEGER_PACK_SIZE)
769                    rb_bug("unexpected intger size for pack: %d", integer_size);
770                while (len-- > 0) {
771                    union {
772                        unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG];
773                        char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG];
774                    } v;
775                    int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG;
776                    int i;
777
778                    from = NEXTFROM;
779                    rb_big_pack(from, v.i, num_longs);
780                    if (bigendian_p) {
781                        for (i = 0; i < num_longs/2; i++) {
782                            unsigned long t = v.i[i];
783                            v.i[i] = v.i[num_longs-1-i];
784                            v.i[num_longs-1-i] = t;
785                        }
786                    }
787		    if (bigendian_p != BIGENDIAN_P()) {
788                        for (i = 0; i < num_longs; i++)
789                            v.i[i] = swapl(v.i[i]);
790                    }
791                    rb_str_buf_cat(res,
792                                   bigendian_p ?
793                                     v.a + sizeof(long)*num_longs - integer_size :
794                                     v.a,
795                                   integer_size);
796                }
797                break;
798	    }
799	    break;
800
801	  case 'f':		/* single precision float in native format */
802	  case 'F':		/* ditto */
803	    while (len-- > 0) {
804		float f;
805
806		from = NEXTFROM;
807		f = (float)RFLOAT_VALUE(rb_to_float(from));
808		rb_str_buf_cat(res, (char*)&f, sizeof(float));
809	    }
810	    break;
811
812	  case 'e':		/* single precision float in VAX byte-order */
813	    while (len-- > 0) {
814		float f;
815		FLOAT_CONVWITH(ftmp);
816
817		from = NEXTFROM;
818		f = (float)RFLOAT_VALUE(rb_to_float(from));
819		f = HTOVF(f,ftmp);
820		rb_str_buf_cat(res, (char*)&f, sizeof(float));
821	    }
822	    break;
823
824	  case 'E':		/* double precision float in VAX byte-order */
825	    while (len-- > 0) {
826		double d;
827		DOUBLE_CONVWITH(dtmp);
828
829		from = NEXTFROM;
830		d = RFLOAT_VALUE(rb_to_float(from));
831		d = HTOVD(d,dtmp);
832		rb_str_buf_cat(res, (char*)&d, sizeof(double));
833	    }
834	    break;
835
836	  case 'd':		/* double precision float in native format */
837	  case 'D':		/* ditto */
838	    while (len-- > 0) {
839		double d;
840
841		from = NEXTFROM;
842		d = RFLOAT_VALUE(rb_to_float(from));
843		rb_str_buf_cat(res, (char*)&d, sizeof(double));
844	    }
845	    break;
846
847	  case 'g':		/* single precision float in network byte-order */
848	    while (len-- > 0) {
849		float f;
850		FLOAT_CONVWITH(ftmp);
851
852		from = NEXTFROM;
853		f = (float)RFLOAT_VALUE(rb_to_float(from));
854		f = HTONF(f,ftmp);
855		rb_str_buf_cat(res, (char*)&f, sizeof(float));
856	    }
857	    break;
858
859	  case 'G':		/* double precision float in network byte-order */
860	    while (len-- > 0) {
861		double d;
862		DOUBLE_CONVWITH(dtmp);
863
864		from = NEXTFROM;
865		d = RFLOAT_VALUE(rb_to_float(from));
866		d = HTOND(d,dtmp);
867		rb_str_buf_cat(res, (char*)&d, sizeof(double));
868	    }
869	    break;
870
871	  case 'x':		/* null byte */
872	  grow:
873	    while (len >= 10) {
874		rb_str_buf_cat(res, nul10, 10);
875		len -= 10;
876	    }
877	    rb_str_buf_cat(res, nul10, len);
878	    break;
879
880	  case 'X':		/* back up byte */
881	  shrink:
882	    plen = RSTRING_LEN(res);
883	    if (plen < len)
884		rb_raise(rb_eArgError, "X outside of string");
885	    rb_str_set_len(res, plen - len);
886	    break;
887
888	  case '@':		/* null fill to absolute position */
889	    len -= RSTRING_LEN(res);
890	    if (len > 0) goto grow;
891	    len = -len;
892	    if (len > 0) goto shrink;
893	    break;
894
895	  case '%':
896	    rb_raise(rb_eArgError, "%% is not supported");
897	    break;
898
899	  case 'U':		/* Unicode character */
900	    while (len-- > 0) {
901		SIGNED_VALUE l;
902		char buf[8];
903		int le;
904
905		from = NEXTFROM;
906		from = rb_to_int(from);
907		l = NUM2LONG(from);
908		if (l < 0) {
909		    rb_raise(rb_eRangeError, "pack(U): value out of range");
910		}
911		le = rb_uv_to_utf8(buf, l);
912		rb_str_buf_cat(res, (char*)buf, le);
913	    }
914	    break;
915
916	  case 'u':		/* uuencoded string */
917	  case 'm':		/* base64 encoded string */
918	    from = NEXTFROM;
919	    StringValue(from);
920	    ptr = RSTRING_PTR(from);
921	    plen = RSTRING_LEN(from);
922
923	    if (len == 0 && type == 'm') {
924		encodes(res, ptr, plen, type, 0);
925		ptr += plen;
926		break;
927	    }
928	    if (len <= 2)
929		len = 45;
930	    else if (len > 63 && type == 'u')
931		len = 63;
932	    else
933		len = len / 3 * 3;
934	    while (plen > 0) {
935		long todo;
936
937		if (plen > len)
938		    todo = len;
939		else
940		    todo = plen;
941		encodes(res, ptr, todo, type, 1);
942		plen -= todo;
943		ptr += todo;
944	    }
945	    break;
946
947	  case 'M':		/* quoted-printable encoded string */
948	    from = rb_obj_as_string(NEXTFROM);
949	    if (len <= 1)
950		len = 72;
951	    qpencode(res, from, len);
952	    break;
953
954	  case 'P':		/* pointer to packed byte string */
955	    from = THISFROM;
956	    if (!NIL_P(from)) {
957		StringValue(from);
958		if (RSTRING_LEN(from) < len) {
959		    rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
960			     RSTRING_LEN(from), len);
961		}
962	    }
963	    len = 1;
964	    /* FALL THROUGH */
965	  case 'p':		/* pointer to string */
966	    while (len-- > 0) {
967		char *t;
968		from = NEXTFROM;
969		if (NIL_P(from)) {
970		    t = 0;
971		}
972		else {
973		    t = StringValuePtr(from);
974		}
975		if (!associates) {
976		    associates = rb_ary_new();
977		}
978		rb_ary_push(associates, from);
979		rb_obj_taint(from);
980		rb_str_buf_cat(res, (char*)&t, sizeof(char*));
981	    }
982	    break;
983
984	  case 'w':		/* BER compressed integer  */
985	    while (len-- > 0) {
986		unsigned long ul;
987		VALUE buf = rb_str_new(0, 0);
988		char c, *bufs, *bufe;
989
990		from = NEXTFROM;
991		if (RB_TYPE_P(from, T_BIGNUM)) {
992		    VALUE big128 = rb_uint2big(128);
993		    while (RB_TYPE_P(from, T_BIGNUM)) {
994			from = rb_big_divmod(from, big128);
995			c = castchar(NUM2INT(RARRAY_PTR(from)[1]) | 0x80); /* mod */
996			rb_str_buf_cat(buf, &c, sizeof(char));
997			from = RARRAY_PTR(from)[0]; /* div */
998		    }
999		}
1000
1001		{
1002		    long l = NUM2LONG(from);
1003		    if (l < 0) {
1004			rb_raise(rb_eArgError, "can't compress negative numbers");
1005		    }
1006		    ul = l;
1007		}
1008
1009		while (ul) {
1010		    c = castchar((ul & 0x7f) | 0x80);
1011		    rb_str_buf_cat(buf, &c, sizeof(char));
1012		    ul >>=  7;
1013		}
1014
1015		if (RSTRING_LEN(buf)) {
1016		    bufs = RSTRING_PTR(buf);
1017		    bufe = bufs + RSTRING_LEN(buf) - 1;
1018		    *bufs &= 0x7f; /* clear continue bit */
1019		    while (bufs < bufe) { /* reverse */
1020			c = *bufs;
1021			*bufs++ = *bufe;
1022			*bufe-- = c;
1023		    }
1024		    rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
1025		}
1026		else {
1027		    c = 0;
1028		    rb_str_buf_cat(res, &c, sizeof(char));
1029		}
1030	    }
1031	    break;
1032
1033	  default:
1034	    rb_warning("unknown pack directive '%c' in '%s'",
1035		type, RSTRING_PTR(fmt));
1036	    break;
1037	}
1038    }
1039
1040    if (associates) {
1041	rb_str_associate(res, associates);
1042    }
1043    OBJ_INFECT(res, fmt);
1044    switch (enc_info) {
1045      case 1:
1046	ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
1047	break;
1048      case 2:
1049	rb_enc_set_index(res, rb_utf8_encindex());
1050	break;
1051      default:
1052	/* do nothing, keep ASCII-8BIT */
1053	break;
1054    }
1055    return res;
1056}
1057
1058static const char uu_table[] =
1059"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1060static const char b64_table[] =
1061"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1062
1063static void
1064encodes(VALUE str, const char *s, long len, int type, int tail_lf)
1065{
1066    char buff[4096];
1067    long i = 0;
1068    const char *trans = type == 'u' ? uu_table : b64_table;
1069    char padding;
1070
1071    if (type == 'u') {
1072	buff[i++] = (char)len + ' ';
1073	padding = '`';
1074    }
1075    else {
1076	padding = '=';
1077    }
1078    while (len >= 3) {
1079        while (len >= 3 && sizeof(buff)-i >= 4) {
1080            buff[i++] = trans[077 & (*s >> 2)];
1081            buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1082            buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
1083            buff[i++] = trans[077 & s[2]];
1084            s += 3;
1085            len -= 3;
1086        }
1087        if (sizeof(buff)-i < 4) {
1088            rb_str_buf_cat(str, buff, i);
1089            i = 0;
1090        }
1091    }
1092
1093    if (len == 2) {
1094	buff[i++] = trans[077 & (*s >> 2)];
1095	buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1096	buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
1097	buff[i++] = padding;
1098    }
1099    else if (len == 1) {
1100	buff[i++] = trans[077 & (*s >> 2)];
1101	buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
1102	buff[i++] = padding;
1103	buff[i++] = padding;
1104    }
1105    if (tail_lf) buff[i++] = '\n';
1106    rb_str_buf_cat(str, buff, i);
1107}
1108
1109static const char hex_table[] = "0123456789ABCDEF";
1110
1111static void
1112qpencode(VALUE str, VALUE from, long len)
1113{
1114    char buff[1024];
1115    long i = 0, n = 0, prev = EOF;
1116    unsigned char *s = (unsigned char*)RSTRING_PTR(from);
1117    unsigned char *send = s + RSTRING_LEN(from);
1118
1119    while (s < send) {
1120        if ((*s > 126) ||
1121	    (*s < 32 && *s != '\n' && *s != '\t') ||
1122	    (*s == '=')) {
1123	    buff[i++] = '=';
1124	    buff[i++] = hex_table[*s >> 4];
1125	    buff[i++] = hex_table[*s & 0x0f];
1126            n += 3;
1127            prev = EOF;
1128        }
1129	else if (*s == '\n') {
1130            if (prev == ' ' || prev == '\t') {
1131		buff[i++] = '=';
1132		buff[i++] = *s;
1133            }
1134	    buff[i++] = *s;
1135            n = 0;
1136            prev = *s;
1137        }
1138	else {
1139	    buff[i++] = *s;
1140            n++;
1141            prev = *s;
1142        }
1143        if (n > len) {
1144	    buff[i++] = '=';
1145	    buff[i++] = '\n';
1146            n = 0;
1147            prev = '\n';
1148        }
1149	if (i > 1024 - 5) {
1150	    rb_str_buf_cat(str, buff, i);
1151	    i = 0;
1152	}
1153	s++;
1154    }
1155    if (n > 0) {
1156	buff[i++] = '=';
1157	buff[i++] = '\n';
1158    }
1159    if (i > 0) {
1160	rb_str_buf_cat(str, buff, i);
1161    }
1162}
1163
1164static inline int
1165hex2num(char c)
1166{
1167    switch (c) {
1168      case '0': case '1': case '2': case '3': case '4':
1169      case '5': case '6': case '7': case '8': case '9':
1170        return c - '0';
1171      case 'a': case 'b': case 'c':
1172      case 'd': case 'e': case 'f':
1173	return c - 'a' + 10;
1174      case 'A': case 'B': case 'C':
1175      case 'D': case 'E': case 'F':
1176	return c - 'A' + 10;
1177      default:
1178	return -1;
1179    }
1180}
1181
1182#define PACK_LENGTH_ADJUST_SIZE(sz) do {	\
1183    tmp_len = 0;				\
1184    if (len > (long)((send-s)/(sz))) {		\
1185        if (!star) {				\
1186	    tmp_len = len-(send-s)/(sz);		\
1187        }					\
1188	len = (send-s)/(sz);			\
1189    }						\
1190} while (0)
1191
1192#define PACK_ITEM_ADJUST() do { \
1193    if (tmp_len > 0 && !block_p) \
1194	rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
1195} while (0)
1196
1197static VALUE
1198infected_str_new(const char *ptr, long len, VALUE str)
1199{
1200    VALUE s = rb_str_new(ptr, len);
1201
1202    OBJ_INFECT(s, str);
1203    return s;
1204}
1205
1206/*
1207 *  call-seq:
1208 *     str.unpack(format)    ->  anArray
1209 *
1210 *  Decodes <i>str</i> (which may contain binary data) according to the
1211 *  format string, returning an array of each value extracted. The
1212 *  format string consists of a sequence of single-character directives,
1213 *  summarized in the table at the end of this entry.
1214 *  Each directive may be followed
1215 *  by a number, indicating the number of times to repeat with this
1216 *  directive. An asterisk (``<code>*</code>'') will use up all
1217 *  remaining elements. The directives <code>sSiIlL</code> may each be
1218 *  followed by an underscore (``<code>_</code>'') or
1219 *  exclamation mark (``<code>!</code>'') to use the underlying
1220 *  platform's native size for the specified type; otherwise, it uses a
1221 *  platform-independent consistent size. Spaces are ignored in the
1222 *  format string. See also <code>Array#pack</code>.
1223 *
1224 *     "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
1225 *     "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
1226 *     "abc \0abc \0".unpack('Z*Z*')       #=> ["abc ", "abc "]
1227 *     "aa".unpack('b8B8')                 #=> ["10000110", "01100001"]
1228 *     "aaa".unpack('h2H2c')               #=> ["16", "61", 97]
1229 *     "\xfe\xff\xfe\xff".unpack('sS')     #=> [-2, 65534]
1230 *     "now=20is".unpack('M*')             #=> ["now is"]
1231 *     "whole".unpack('xax2aX2aX1aX2a')    #=> ["h", "e", "l", "l", "o"]
1232 *
1233 *  This table summarizes the various formats and the Ruby classes
1234 *  returned by each.
1235 *
1236 *   Integer      |         |
1237 *   Directive    | Returns | Meaning
1238 *   -----------------------------------------------------------------
1239 *      C         | Integer | 8-bit unsigned (unsigned char)
1240 *      S         | Integer | 16-bit unsigned, native endian (uint16_t)
1241 *      L         | Integer | 32-bit unsigned, native endian (uint32_t)
1242 *      Q         | Integer | 64-bit unsigned, native endian (uint64_t)
1243 *                |         |
1244 *      c         | Integer | 8-bit signed (signed char)
1245 *      s         | Integer | 16-bit signed, native endian (int16_t)
1246 *      l         | Integer | 32-bit signed, native endian (int32_t)
1247 *      q         | Integer | 64-bit signed, native endian (int64_t)
1248 *                |         |
1249 *      S_, S!    | Integer | unsigned short, native endian
1250 *      I, I_, I! | Integer | unsigned int, native endian
1251 *      L_, L!    | Integer | unsigned long, native endian
1252 *                |         |
1253 *      s_, s!    | Integer | signed short, native endian
1254 *      i, i_, i! | Integer | signed int, native endian
1255 *      l_, l!    | Integer | signed long, native endian
1256 *                |         |
1257 *      S> L> Q>  | Integer | same as the directives without ">" except
1258 *      s> l> q>  |         | big endian
1259 *      S!> I!>   |         | (available since Ruby 1.9.3)
1260 *      L!> Q!>   |         | "S>" is same as "n"
1261 *      s!> i!>   |         | "L>" is same as "N"
1262 *      l!> q!>   |         |
1263 *                |         |
1264 *      S< L< Q<  | Integer | same as the directives without "<" except
1265 *      s< l< q<  |         | little endian
1266 *      S!< I!<   |         | (available since Ruby 1.9.3)
1267 *      L!< Q!<   |         | "S<" is same as "v"
1268 *      s!< i!<   |         | "L<" is same as "V"
1269 *      l!< q!<   |         |
1270 *                |         |
1271 *      n         | Integer | 16-bit unsigned, network (big-endian) byte order
1272 *      N         | Integer | 32-bit unsigned, network (big-endian) byte order
1273 *      v         | Integer | 16-bit unsigned, VAX (little-endian) byte order
1274 *      V         | Integer | 32-bit unsigned, VAX (little-endian) byte order
1275 *                |         |
1276 *      U         | Integer | UTF-8 character
1277 *      w         | Integer | BER-compressed integer (see Array.pack)
1278 *
1279 *   Float        |         |
1280 *   Directive    | Returns | Meaning
1281 *   -----------------------------------------------------------------
1282 *      D, d      | Float   | double-precision, native format
1283 *      F, f      | Float   | single-precision, native format
1284 *      E         | Float   | double-precision, little-endian byte order
1285 *      e         | Float   | single-precision, little-endian byte order
1286 *      G         | Float   | double-precision, network (big-endian) byte order
1287 *      g         | Float   | single-precision, network (big-endian) byte order
1288 *
1289 *   String       |         |
1290 *   Directive    | Returns | Meaning
1291 *   -----------------------------------------------------------------
1292 *      A         | String  | arbitrary binary string (remove trailing nulls and ASCII spaces)
1293 *      a         | String  | arbitrary binary string
1294 *      Z         | String  | null-terminated string
1295 *      B         | String  | bit string (MSB first)
1296 *      b         | String  | bit string (LSB first)
1297 *      H         | String  | hex string (high nibble first)
1298 *      h         | String  | hex string (low nibble first)
1299 *      u         | String  | UU-encoded string
1300 *      M         | String  | quoted-printable, MIME encoding (see RFC2045)
1301 *      m         | String  | base64 encoded string (RFC 2045) (default)
1302 *                |         | base64 encoded string (RFC 4648) if followed by 0
1303 *      P         | String  | pointer to a structure (fixed-length string)
1304 *      p         | String  | pointer to a null-terminated string
1305 *
1306 *   Misc.        |         |
1307 *   Directive    | Returns | Meaning
1308 *   -----------------------------------------------------------------
1309 *      @         | ---     | skip to the offset given by the length argument
1310 *      X         | ---     | skip backward one byte
1311 *      x         | ---     | skip forward one byte
1312 */
1313
1314static VALUE
1315pack_unpack(VALUE str, VALUE fmt)
1316{
1317    static const char hexdigits[] = "0123456789abcdef";
1318    char *s, *send;
1319    char *p, *pend;
1320    VALUE ary;
1321    char type;
1322    long len, tmp_len;
1323    int star;
1324#ifdef NATINT_PACK
1325    int natint;			/* native integer */
1326#endif
1327    int block_p = rb_block_given_p();
1328    int signed_p, integer_size, bigendian_p;
1329#define UNPACK_PUSH(item) do {\
1330	VALUE item_val = (item);\
1331	if (block_p) {\
1332	    rb_yield(item_val);\
1333	}\
1334	else {\
1335	    rb_ary_push(ary, item_val);\
1336	}\
1337    } while (0)
1338
1339    StringValue(str);
1340    StringValue(fmt);
1341    s = RSTRING_PTR(str);
1342    send = s + RSTRING_LEN(str);
1343    p = RSTRING_PTR(fmt);
1344    pend = p + RSTRING_LEN(fmt);
1345
1346    ary = block_p ? Qnil : rb_ary_new();
1347    while (p < pend) {
1348	int explicit_endian = 0;
1349	type = *p++;
1350#ifdef NATINT_PACK
1351	natint = 0;
1352#endif
1353
1354	if (ISSPACE(type)) continue;
1355	if (type == '#') {
1356	    while ((p < pend) && (*p != '\n')) {
1357		p++;
1358	    }
1359	    continue;
1360	}
1361
1362	star = 0;
1363	{
1364	    static const char natstr[] = "sSiIlL";
1365	    static const char endstr[] = "sSiIlLqQ";
1366
1367          modifiers:
1368	    switch (*p) {
1369	      case '_':
1370	      case '!':
1371
1372		if (strchr(natstr, type)) {
1373#ifdef NATINT_PACK
1374		    natint = 1;
1375#endif
1376		    p++;
1377		}
1378		else {
1379		    rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1380		}
1381		goto modifiers;
1382
1383	      case '<':
1384	      case '>':
1385		if (!strchr(endstr, type)) {
1386		    rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1387		}
1388		if (explicit_endian) {
1389		    rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1390		}
1391		explicit_endian = *p++;
1392		goto modifiers;
1393	    }
1394	}
1395
1396	if (p >= pend)
1397	    len = 1;
1398	else if (*p == '*') {
1399	    star = 1;
1400	    len = send - s;
1401	    p++;
1402	}
1403	else if (ISDIGIT(*p)) {
1404	    errno = 0;
1405	    len = STRTOUL(p, (char**)&p, 10);
1406	    if (errno) {
1407		rb_raise(rb_eRangeError, "pack length too big");
1408	    }
1409	}
1410	else {
1411	    len = (type != '@');
1412	}
1413
1414	switch (type) {
1415	  case '%':
1416	    rb_raise(rb_eArgError, "%% is not supported");
1417	    break;
1418
1419	  case 'A':
1420	    if (len > send - s) len = send - s;
1421	    {
1422		long end = len;
1423		char *t = s + len - 1;
1424
1425		while (t >= s) {
1426		    if (*t != ' ' && *t != '\0') break;
1427		    t--; len--;
1428		}
1429		UNPACK_PUSH(infected_str_new(s, len, str));
1430		s += end;
1431	    }
1432	    break;
1433
1434	  case 'Z':
1435	    {
1436		char *t = s;
1437
1438		if (len > send-s) len = send-s;
1439		while (t < s+len && *t) t++;
1440		UNPACK_PUSH(infected_str_new(s, t-s, str));
1441		if (t < send) t++;
1442		s = star ? t : s+len;
1443	    }
1444	    break;
1445
1446	  case 'a':
1447	    if (len > send - s) len = send - s;
1448	    UNPACK_PUSH(infected_str_new(s, len, str));
1449	    s += len;
1450	    break;
1451
1452	  case 'b':
1453	    {
1454		VALUE bitstr;
1455		char *t;
1456		int bits;
1457		long i;
1458
1459		if (p[-1] == '*' || len > (send - s) * 8)
1460		    len = (send - s) * 8;
1461		bits = 0;
1462		UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1463		t = RSTRING_PTR(bitstr);
1464		for (i=0; i<len; i++) {
1465		    if (i & 7) bits >>= 1;
1466		    else bits = *s++;
1467		    *t++ = (bits & 1) ? '1' : '0';
1468		}
1469	    }
1470	    break;
1471
1472	  case 'B':
1473	    {
1474		VALUE bitstr;
1475		char *t;
1476		int bits;
1477		long i;
1478
1479		if (p[-1] == '*' || len > (send - s) * 8)
1480		    len = (send - s) * 8;
1481		bits = 0;
1482		UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1483		t = RSTRING_PTR(bitstr);
1484		for (i=0; i<len; i++) {
1485		    if (i & 7) bits <<= 1;
1486		    else bits = *s++;
1487		    *t++ = (bits & 128) ? '1' : '0';
1488		}
1489	    }
1490	    break;
1491
1492	  case 'h':
1493	    {
1494		VALUE bitstr;
1495		char *t;
1496		int bits;
1497		long i;
1498
1499		if (p[-1] == '*' || len > (send - s) * 2)
1500		    len = (send - s) * 2;
1501		bits = 0;
1502		UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1503		t = RSTRING_PTR(bitstr);
1504		for (i=0; i<len; i++) {
1505		    if (i & 1)
1506			bits >>= 4;
1507		    else
1508			bits = *s++;
1509		    *t++ = hexdigits[bits & 15];
1510		}
1511	    }
1512	    break;
1513
1514	  case 'H':
1515	    {
1516		VALUE bitstr;
1517		char *t;
1518		int bits;
1519		long i;
1520
1521		if (p[-1] == '*' || len > (send - s) * 2)
1522		    len = (send - s) * 2;
1523		bits = 0;
1524		UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1525		t = RSTRING_PTR(bitstr);
1526		for (i=0; i<len; i++) {
1527		    if (i & 1)
1528			bits <<= 4;
1529		    else
1530			bits = *s++;
1531		    *t++ = hexdigits[(bits >> 4) & 15];
1532		}
1533	    }
1534	    break;
1535
1536	  case 'c':
1537	    PACK_LENGTH_ADJUST_SIZE(sizeof(char));
1538	    while (len-- > 0) {
1539                int c = *s++;
1540                if (c > (char)127) c-=256;
1541		UNPACK_PUSH(INT2FIX(c));
1542	    }
1543	    PACK_ITEM_ADJUST();
1544	    break;
1545
1546	  case 'C':
1547	    PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char));
1548	    while (len-- > 0) {
1549		unsigned char c = *s++;
1550		UNPACK_PUSH(INT2FIX(c));
1551	    }
1552	    PACK_ITEM_ADJUST();
1553	    break;
1554
1555	  case 's':
1556	    signed_p = 1;
1557	    integer_size = NATINT_LEN(short, 2);
1558	    bigendian_p = BIGENDIAN_P();
1559	    goto unpack_integer;
1560
1561	  case 'S':
1562	    signed_p = 0;
1563	    integer_size = NATINT_LEN(short, 2);
1564	    bigendian_p = BIGENDIAN_P();
1565	    goto unpack_integer;
1566
1567	  case 'i':
1568	    signed_p = 1;
1569	    integer_size = (int)sizeof(int);
1570	    bigendian_p = BIGENDIAN_P();
1571	    goto unpack_integer;
1572
1573	  case 'I':
1574	    signed_p = 0;
1575	    integer_size = (int)sizeof(int);
1576	    bigendian_p = BIGENDIAN_P();
1577	    goto unpack_integer;
1578
1579	  case 'l':
1580	    signed_p = 1;
1581	    integer_size = NATINT_LEN(long, 4);
1582	    bigendian_p = BIGENDIAN_P();
1583	    goto unpack_integer;
1584
1585	  case 'L':
1586	    signed_p = 0;
1587	    integer_size = NATINT_LEN(long, 4);
1588	    bigendian_p = BIGENDIAN_P();
1589	    goto unpack_integer;
1590
1591	  case 'q':
1592	    signed_p = 1;
1593	    integer_size = 8;
1594	    bigendian_p = BIGENDIAN_P();
1595	    goto unpack_integer;
1596
1597	  case 'Q':
1598	    signed_p = 0;
1599	    integer_size = 8;
1600	    bigendian_p = BIGENDIAN_P();
1601	    goto unpack_integer;
1602
1603	  case 'n':
1604	    signed_p = 0;
1605	    integer_size = 2;
1606	    bigendian_p = 1;
1607	    goto unpack_integer;
1608
1609	  case 'N':
1610	    signed_p = 0;
1611	    integer_size = 4;
1612	    bigendian_p = 1;
1613	    goto unpack_integer;
1614
1615	  case 'v':
1616	    signed_p = 0;
1617	    integer_size = 2;
1618	    bigendian_p = 0;
1619	    goto unpack_integer;
1620
1621	  case 'V':
1622	    signed_p = 0;
1623	    integer_size = 4;
1624	    bigendian_p = 0;
1625	    goto unpack_integer;
1626
1627	  unpack_integer:
1628	    if (explicit_endian) {
1629		bigendian_p = explicit_endian == '>';
1630	    }
1631
1632	    switch (integer_size) {
1633#if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
1634	      case SIZEOF_INT16_T:
1635		if (signed_p) {
1636		    PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t));
1637		    while (len-- > 0) {
1638			union {
1639                            int16_t i;
1640                            char a[sizeof(int16_t)];
1641                        } v;
1642			memcpy(v.a, s, sizeof(int16_t));
1643			if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1644			s += sizeof(int16_t);
1645			UNPACK_PUSH(INT2FIX(v.i));
1646		    }
1647		    PACK_ITEM_ADJUST();
1648		}
1649		else {
1650		    PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t));
1651		    while (len-- > 0) {
1652			union {
1653                            uint16_t i;
1654                            char a[sizeof(uint16_t)];
1655                        } v;
1656			memcpy(v.a, s, sizeof(uint16_t));
1657			if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1658			s += sizeof(uint16_t);
1659			UNPACK_PUSH(INT2FIX(v.i));
1660		    }
1661		    PACK_ITEM_ADJUST();
1662		}
1663		break;
1664#endif
1665
1666#if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
1667	      case SIZEOF_INT32_T:
1668		if (signed_p) {
1669		    PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t));
1670		    while (len-- > 0) {
1671			union {
1672                            int32_t i;
1673                            char a[sizeof(int32_t)];
1674                        } v;
1675			memcpy(v.a, s, sizeof(int32_t));
1676			if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1677			s += sizeof(int32_t);
1678			UNPACK_PUSH(INT2NUM(v.i));
1679		    }
1680		    PACK_ITEM_ADJUST();
1681		}
1682		else {
1683		    PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t));
1684		    while (len-- > 0) {
1685			union {
1686                            uint32_t i;
1687                            char a[sizeof(uint32_t)];
1688                        } v;
1689			memcpy(v.a, s, sizeof(uint32_t));
1690			if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1691			s += sizeof(uint32_t);
1692			UNPACK_PUSH(UINT2NUM(v.i));
1693		    }
1694		    PACK_ITEM_ADJUST();
1695		}
1696		break;
1697#endif
1698
1699#if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK)
1700	      case SIZEOF_INT64_T:
1701		if (signed_p) {
1702		    PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t));
1703		    while (len-- > 0) {
1704			union {
1705                            int64_t i;
1706                            char a[sizeof(int64_t)];
1707                        } v;
1708			memcpy(v.a, s, sizeof(int64_t));
1709			if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1710			s += sizeof(int64_t);
1711			UNPACK_PUSH(INT64toNUM(v.i));
1712		    }
1713		    PACK_ITEM_ADJUST();
1714		}
1715		else {
1716		    PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t));
1717		    while (len-- > 0) {
1718			union {
1719                            uint64_t i;
1720                            char a[sizeof(uint64_t)];
1721                        } v;
1722			memcpy(v.a, s, sizeof(uint64_t));
1723			if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1724			s += sizeof(uint64_t);
1725			UNPACK_PUSH(UINT64toNUM(v.i));
1726		    }
1727		    PACK_ITEM_ADJUST();
1728		}
1729		break;
1730#endif
1731
1732              default:
1733                if (integer_size > MAX_INTEGER_PACK_SIZE)
1734                    rb_bug("unexpected integer size for pack: %d", integer_size);
1735                PACK_LENGTH_ADJUST_SIZE(integer_size);
1736                while (len-- > 0) {
1737                    union {
1738                        unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG];
1739                        char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG];
1740                    } v;
1741                    int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG;
1742                    int i;
1743
1744                    if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0)
1745                        memset(v.a, 0xff, sizeof(long)*num_longs);
1746                    else
1747                        memset(v.a, 0, sizeof(long)*num_longs);
1748                    if (bigendian_p)
1749                        memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size);
1750                    else
1751                        memcpy(v.a, s, integer_size);
1752                    if (bigendian_p) {
1753                        for (i = 0; i < num_longs/2; i++) {
1754                            unsigned long t = v.i[i];
1755                            v.i[i] = v.i[num_longs-1-i];
1756                            v.i[num_longs-1-i] = t;
1757                        }
1758                    }
1759                    if (bigendian_p != BIGENDIAN_P()) {
1760                        for (i = 0; i < num_longs; i++)
1761                            v.i[i] = swapl(v.i[i]);
1762                    }
1763                    s += integer_size;
1764                    UNPACK_PUSH(rb_big_unpack(v.i, num_longs));
1765                }
1766                PACK_ITEM_ADJUST();
1767		break;
1768	    }
1769            break;
1770
1771	  case 'f':
1772	  case 'F':
1773	    PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1774	    while (len-- > 0) {
1775		float tmp;
1776		memcpy(&tmp, s, sizeof(float));
1777		s += sizeof(float);
1778		UNPACK_PUSH(DBL2NUM((double)tmp));
1779	    }
1780	    PACK_ITEM_ADJUST();
1781	    break;
1782
1783	  case 'e':
1784	    PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1785	    while (len-- > 0) {
1786	        float tmp;
1787		FLOAT_CONVWITH(ftmp);
1788
1789		memcpy(&tmp, s, sizeof(float));
1790		s += sizeof(float);
1791		tmp = VTOHF(tmp,ftmp);
1792		UNPACK_PUSH(DBL2NUM((double)tmp));
1793	    }
1794	    PACK_ITEM_ADJUST();
1795	    break;
1796
1797	  case 'E':
1798	    PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1799	    while (len-- > 0) {
1800		double tmp;
1801		DOUBLE_CONVWITH(dtmp);
1802
1803		memcpy(&tmp, s, sizeof(double));
1804		s += sizeof(double);
1805		tmp = VTOHD(tmp,dtmp);
1806		UNPACK_PUSH(DBL2NUM(tmp));
1807	    }
1808	    PACK_ITEM_ADJUST();
1809	    break;
1810
1811	  case 'D':
1812	  case 'd':
1813	    PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1814	    while (len-- > 0) {
1815		double tmp;
1816		memcpy(&tmp, s, sizeof(double));
1817		s += sizeof(double);
1818		UNPACK_PUSH(DBL2NUM(tmp));
1819	    }
1820	    PACK_ITEM_ADJUST();
1821	    break;
1822
1823	  case 'g':
1824	    PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1825	    while (len-- > 0) {
1826	        float tmp;
1827		FLOAT_CONVWITH(ftmp);
1828
1829		memcpy(&tmp, s, sizeof(float));
1830		s += sizeof(float);
1831		tmp = NTOHF(tmp,ftmp);
1832		UNPACK_PUSH(DBL2NUM((double)tmp));
1833	    }
1834	    PACK_ITEM_ADJUST();
1835	    break;
1836
1837	  case 'G':
1838	    PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1839	    while (len-- > 0) {
1840		double tmp;
1841		DOUBLE_CONVWITH(dtmp);
1842
1843		memcpy(&tmp, s, sizeof(double));
1844		s += sizeof(double);
1845		tmp = NTOHD(tmp,dtmp);
1846		UNPACK_PUSH(DBL2NUM(tmp));
1847	    }
1848	    PACK_ITEM_ADJUST();
1849	    break;
1850
1851	  case 'U':
1852	    if (len > send - s) len = send - s;
1853	    while (len > 0 && s < send) {
1854		long alen = send - s;
1855		unsigned long l;
1856
1857		l = utf8_to_uv(s, &alen);
1858		s += alen; len--;
1859		UNPACK_PUSH(ULONG2NUM(l));
1860	    }
1861	    break;
1862
1863	  case 'u':
1864	    {
1865		VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1866		char *ptr = RSTRING_PTR(buf);
1867		long total = 0;
1868
1869		while (s < send && *s > ' ' && *s < 'a') {
1870		    long a,b,c,d;
1871		    char hunk[4];
1872
1873		    hunk[3] = '\0';
1874		    len = (*s++ - ' ') & 077;
1875		    total += len;
1876		    if (total > RSTRING_LEN(buf)) {
1877			len -= total - RSTRING_LEN(buf);
1878			total = RSTRING_LEN(buf);
1879		    }
1880
1881		    while (len > 0) {
1882			long mlen = len > 3 ? 3 : len;
1883
1884			if (s < send && *s >= ' ')
1885			    a = (*s++ - ' ') & 077;
1886			else
1887			    a = 0;
1888			if (s < send && *s >= ' ')
1889			    b = (*s++ - ' ') & 077;
1890			else
1891			    b = 0;
1892			if (s < send && *s >= ' ')
1893			    c = (*s++ - ' ') & 077;
1894			else
1895			    c = 0;
1896			if (s < send && *s >= ' ')
1897			    d = (*s++ - ' ') & 077;
1898			else
1899			    d = 0;
1900			hunk[0] = (char)(a << 2 | b >> 4);
1901			hunk[1] = (char)(b << 4 | c >> 2);
1902			hunk[2] = (char)(c << 6 | d);
1903			memcpy(ptr, hunk, mlen);
1904			ptr += mlen;
1905			len -= mlen;
1906		    }
1907		    if (*s == '\r') s++;
1908		    if (*s == '\n') s++;
1909		    else if (s < send && (s+1 == send || s[1] == '\n'))
1910			s += 2;	/* possible checksum byte */
1911		}
1912
1913		rb_str_set_len(buf, total);
1914		UNPACK_PUSH(buf);
1915	    }
1916	    break;
1917
1918	  case 'm':
1919	    {
1920		VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1921		char *ptr = RSTRING_PTR(buf);
1922		int a = -1,b = -1,c = 0,d = 0;
1923		static signed char b64_xtable[256];
1924
1925		if (b64_xtable['/'] <= 0) {
1926		    int i;
1927
1928		    for (i = 0; i < 256; i++) {
1929			b64_xtable[i] = -1;
1930		    }
1931		    for (i = 0; i < 64; i++) {
1932			b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1933		    }
1934		}
1935		if (len == 0) {
1936		    while (s < send) {
1937			a = b = c = d = -1;
1938			a = b64_xtable[(unsigned char)*s++];
1939			if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1940			b = b64_xtable[(unsigned char)*s++];
1941			if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1942			if (*s == '=') {
1943			    if (s + 2 == send && *(s + 1) == '=') break;
1944			    rb_raise(rb_eArgError, "invalid base64");
1945			}
1946			c = b64_xtable[(unsigned char)*s++];
1947			if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1948			if (s + 1 == send && *s == '=') break;
1949			d = b64_xtable[(unsigned char)*s++];
1950			if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1951			*ptr++ = castchar(a << 2 | b >> 4);
1952			*ptr++ = castchar(b << 4 | c >> 2);
1953			*ptr++ = castchar(c << 6 | d);
1954		    }
1955		    if (c == -1) {
1956			*ptr++ = castchar(a << 2 | b >> 4);
1957			if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1958		    }
1959		    else if (d == -1) {
1960			*ptr++ = castchar(a << 2 | b >> 4);
1961			*ptr++ = castchar(b << 4 | c >> 2);
1962			if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1963		    }
1964		}
1965		else {
1966		    while (s < send) {
1967			a = b = c = d = -1;
1968			while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1969			if (s >= send) break;
1970			s++;
1971			while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1972			if (s >= send) break;
1973			s++;
1974			while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1975			if (*s == '=' || s >= send) break;
1976			s++;
1977			while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1978			if (*s == '=' || s >= send) break;
1979			s++;
1980			*ptr++ = castchar(a << 2 | b >> 4);
1981			*ptr++ = castchar(b << 4 | c >> 2);
1982			*ptr++ = castchar(c << 6 | d);
1983		    }
1984		    if (a != -1 && b != -1) {
1985			if (c == -1 && *s == '=')
1986			    *ptr++ = castchar(a << 2 | b >> 4);
1987			else if (c != -1 && *s == '=') {
1988			    *ptr++ = castchar(a << 2 | b >> 4);
1989			    *ptr++ = castchar(b << 4 | c >> 2);
1990			}
1991		    }
1992		}
1993		rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1994		UNPACK_PUSH(buf);
1995	    }
1996	    break;
1997
1998	  case 'M':
1999	    {
2000		VALUE buf = infected_str_new(0, send - s, str);
2001		char *ptr = RSTRING_PTR(buf), *ss = s;
2002		int c1, c2;
2003
2004		while (s < send) {
2005		    if (*s == '=') {
2006			if (++s == send) break;
2007			if (s+1 < send && *s == '\r' && *(s+1) == '\n')
2008			    s++;
2009			if (*s != '\n') {
2010			    if ((c1 = hex2num(*s)) == -1) break;
2011			    if (++s == send) break;
2012			    if ((c2 = hex2num(*s)) == -1) break;
2013			    *ptr++ = castchar(c1 << 4 | c2);
2014			}
2015		    }
2016		    else {
2017			*ptr++ = *s;
2018		    }
2019		    s++;
2020		    ss = s;
2021		}
2022		rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
2023		rb_str_buf_cat(buf, ss, send-ss);
2024		ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
2025		UNPACK_PUSH(buf);
2026	    }
2027	    break;
2028
2029	  case '@':
2030	    if (len > RSTRING_LEN(str))
2031		rb_raise(rb_eArgError, "@ outside of string");
2032	    s = RSTRING_PTR(str) + len;
2033	    break;
2034
2035	  case 'X':
2036	    if (len > s - RSTRING_PTR(str))
2037		rb_raise(rb_eArgError, "X outside of string");
2038	    s -= len;
2039	    break;
2040
2041	  case 'x':
2042	    if (len > send - s)
2043		rb_raise(rb_eArgError, "x outside of string");
2044	    s += len;
2045	    break;
2046
2047	  case 'P':
2048	    if (sizeof(char *) <= (size_t)(send - s)) {
2049		VALUE tmp = Qnil;
2050		char *t;
2051
2052		memcpy(&t, s, sizeof(char *));
2053		s += sizeof(char *);
2054
2055		if (t) {
2056		    VALUE a, *p, *pend;
2057
2058		    if (!(a = rb_str_associated(str))) {
2059			rb_raise(rb_eArgError, "no associated pointer");
2060		    }
2061		    p = RARRAY_PTR(a);
2062		    pend = p + RARRAY_LEN(a);
2063		    while (p < pend) {
2064			if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2065			    if (len < RSTRING_LEN(*p)) {
2066				tmp = rb_tainted_str_new(t, len);
2067				rb_str_associate(tmp, a);
2068			    }
2069			    else {
2070				tmp = *p;
2071			    }
2072			    break;
2073			}
2074			p++;
2075		    }
2076		    if (p == pend) {
2077			rb_raise(rb_eArgError, "non associated pointer");
2078		    }
2079		}
2080		UNPACK_PUSH(tmp);
2081	    }
2082	    break;
2083
2084	  case 'p':
2085	    if (len > (long)((send - s) / sizeof(char *)))
2086		len = (send - s) / sizeof(char *);
2087	    while (len-- > 0) {
2088		if ((size_t)(send - s) < sizeof(char *))
2089		    break;
2090		else {
2091		    VALUE tmp = Qnil;
2092		    char *t;
2093
2094		    memcpy(&t, s, sizeof(char *));
2095		    s += sizeof(char *);
2096
2097		    if (t) {
2098			VALUE a, *p, *pend;
2099
2100			if (!(a = rb_str_associated(str))) {
2101			    rb_raise(rb_eArgError, "no associated pointer");
2102			}
2103			p = RARRAY_PTR(a);
2104			pend = p + RARRAY_LEN(a);
2105			while (p < pend) {
2106			    if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2107				tmp = *p;
2108				break;
2109			    }
2110			    p++;
2111			}
2112			if (p == pend) {
2113			    rb_raise(rb_eArgError, "non associated pointer");
2114			}
2115		    }
2116		    UNPACK_PUSH(tmp);
2117		}
2118	    }
2119	    break;
2120
2121	  case 'w':
2122	    {
2123		unsigned long ul = 0;
2124		unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8);
2125
2126		while (len > 0 && s < send) {
2127		    ul <<= 7;
2128		    ul |= (*s & 0x7f);
2129		    if (!(*s++ & 0x80)) {
2130			UNPACK_PUSH(ULONG2NUM(ul));
2131			len--;
2132			ul = 0;
2133		    }
2134		    else if (ul & ulmask) {
2135			VALUE big = rb_uint2big(ul);
2136			VALUE big128 = rb_uint2big(128);
2137			while (s < send) {
2138			    big = rb_big_mul(big, big128);
2139			    big = rb_big_plus(big, rb_uint2big(*s & 0x7f));
2140			    if (!(*s++ & 0x80)) {
2141				UNPACK_PUSH(big);
2142				len--;
2143				ul = 0;
2144				break;
2145			    }
2146			}
2147		    }
2148		}
2149	    }
2150	    break;
2151
2152	  default:
2153	    rb_warning("unknown unpack directive '%c' in '%s'",
2154		type, RSTRING_PTR(fmt));
2155	    break;
2156	}
2157    }
2158
2159    return ary;
2160}
2161
2162#define BYTEWIDTH 8
2163
2164int
2165rb_uv_to_utf8(char buf[6], unsigned long uv)
2166{
2167    if (uv <= 0x7f) {
2168	buf[0] = (char)uv;
2169	return 1;
2170    }
2171    if (uv <= 0x7ff) {
2172	buf[0] = castchar(((uv>>6)&0xff)|0xc0);
2173	buf[1] = castchar((uv&0x3f)|0x80);
2174	return 2;
2175    }
2176    if (uv <= 0xffff) {
2177	buf[0] = castchar(((uv>>12)&0xff)|0xe0);
2178	buf[1] = castchar(((uv>>6)&0x3f)|0x80);
2179	buf[2] = castchar((uv&0x3f)|0x80);
2180	return 3;
2181    }
2182    if (uv <= 0x1fffff) {
2183	buf[0] = castchar(((uv>>18)&0xff)|0xf0);
2184	buf[1] = castchar(((uv>>12)&0x3f)|0x80);
2185	buf[2] = castchar(((uv>>6)&0x3f)|0x80);
2186	buf[3] = castchar((uv&0x3f)|0x80);
2187	return 4;
2188    }
2189    if (uv <= 0x3ffffff) {
2190	buf[0] = castchar(((uv>>24)&0xff)|0xf8);
2191	buf[1] = castchar(((uv>>18)&0x3f)|0x80);
2192	buf[2] = castchar(((uv>>12)&0x3f)|0x80);
2193	buf[3] = castchar(((uv>>6)&0x3f)|0x80);
2194	buf[4] = castchar((uv&0x3f)|0x80);
2195	return 5;
2196    }
2197    if (uv <= 0x7fffffff) {
2198	buf[0] = castchar(((uv>>30)&0xff)|0xfc);
2199	buf[1] = castchar(((uv>>24)&0x3f)|0x80);
2200	buf[2] = castchar(((uv>>18)&0x3f)|0x80);
2201	buf[3] = castchar(((uv>>12)&0x3f)|0x80);
2202	buf[4] = castchar(((uv>>6)&0x3f)|0x80);
2203	buf[5] = castchar((uv&0x3f)|0x80);
2204	return 6;
2205    }
2206    rb_raise(rb_eRangeError, "pack(U): value out of range");
2207
2208    UNREACHABLE;
2209}
2210
2211static const unsigned long utf8_limits[] = {
2212    0x0,			/* 1 */
2213    0x80,			/* 2 */
2214    0x800,			/* 3 */
2215    0x10000,			/* 4 */
2216    0x200000,			/* 5 */
2217    0x4000000,			/* 6 */
2218    0x80000000,			/* 7 */
2219};
2220
2221static unsigned long
2222utf8_to_uv(const char *p, long *lenp)
2223{
2224    int c = *p++ & 0xff;
2225    unsigned long uv = c;
2226    long n;
2227
2228    if (!(uv & 0x80)) {
2229	*lenp = 1;
2230        return uv;
2231    }
2232    if (!(uv & 0x40)) {
2233	*lenp = 1;
2234	rb_raise(rb_eArgError, "malformed UTF-8 character");
2235    }
2236
2237    if      (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
2238    else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
2239    else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
2240    else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
2241    else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
2242    else {
2243	*lenp = 1;
2244	rb_raise(rb_eArgError, "malformed UTF-8 character");
2245    }
2246    if (n > *lenp) {
2247	rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
2248		 n, *lenp);
2249    }
2250    *lenp = n--;
2251    if (n != 0) {
2252	while (n--) {
2253	    c = *p++ & 0xff;
2254	    if ((c & 0xc0) != 0x80) {
2255		*lenp -= n + 1;
2256		rb_raise(rb_eArgError, "malformed UTF-8 character");
2257	    }
2258	    else {
2259		c &= 0x3f;
2260		uv = uv << 6 | c;
2261	    }
2262	}
2263    }
2264    n = *lenp - 1;
2265    if (uv < utf8_limits[n]) {
2266	rb_raise(rb_eArgError, "redundant UTF-8 sequence");
2267    }
2268    return uv;
2269}
2270
2271void
2272Init_pack(void)
2273{
2274    rb_define_method(rb_cArray, "pack", pack_pack, 1);
2275    rb_define_method(rb_cString, "unpack", pack_unpack, 1);
2276}
2277