1/*    pp_pack.c
2 *
3 *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 *    You may distribute under the terms of either the GNU General Public
7 *    License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * He still hopefully carried some of his gear in his pack: a small tinder-box,
13 * two small shallow pans, the smaller fitting into the larger; inside them a
14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16 * some salt.
17 *
18 *     [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19 */
20
21/* This file contains pp ("push/pop") functions that
22 * execute the opcodes that make up a perl program. A typical pp function
23 * expects to find its arguments on the stack, and usually pushes its
24 * results onto the stack, hence the 'pp' terminology. Each OP structure
25 * contains a pointer to the relevant pp_foo() function.
26 *
27 * This particular file just contains pp_pack() and pp_unpack(). See the
28 * other pp*.c files for the rest of the pp_ functions.
29 */
30
31#include "EXTERN.h"
32#define PERL_IN_PP_PACK_C
33#include "perl.h"
34
35/* Types used by pack/unpack */
36typedef enum {
37  e_no_len,     /* no length  */
38  e_number,     /* number, [] */
39  e_star        /* asterisk   */
40} howlen_t;
41
42typedef struct tempsym {
43  const char*    patptr;   /* current template char */
44  const char*    patend;   /* one after last char   */
45  const char*    grpbeg;   /* 1st char of ()-group  */
46  const char*    grpend;   /* end of ()-group       */
47  I32      code;     /* template code (!<>)   */
48  U32      flags;    /* /=4, comma=2, pack=1  */
49                     /*   and group modifiers */
50  SSize_t  length;   /* length/repeat count   */
51  howlen_t howlen;   /* how length is given   */
52  int      level;    /* () nesting level      */
53  STRLEN   strbeg;   /* offset of group start */
54  struct tempsym *previous; /* previous group */
55} tempsym_t;
56
57#define TEMPSYM_INIT(symptr, p, e, f) \
58    STMT_START {	\
59        (symptr)->patptr   = (p);	\
60        (symptr)->patend   = (e);	\
61        (symptr)->grpbeg   = NULL;	\
62        (symptr)->grpend   = NULL;	\
63        (symptr)->grpend   = NULL;	\
64        (symptr)->code     = 0;		\
65        (symptr)->length   = 0;		\
66        (symptr)->howlen   = e_no_len;	\
67        (symptr)->level    = 0;		\
68        (symptr)->flags    = (f);	\
69        (symptr)->strbeg   = 0;		\
70        (symptr)->previous = NULL;	\
71   } STMT_END
72
73typedef union {
74    NV nv;
75    U8 bytes[sizeof(NV)];
76} NV_bytes;
77
78#if defined(HAS_LONG_DOUBLE)
79typedef union {
80    long double ld;
81    U8 bytes[sizeof(long double)];
82} ld_bytes;
83#endif
84
85#ifndef CHAR_BIT
86# define CHAR_BIT	8
87#endif
88/* Maximum number of bytes to which a byte can grow due to upgrade */
89#define UTF8_EXPAND	2
90
91/*
92 * Offset for integer pack/unpack.
93 *
94 * On architectures where I16 and I32 aren't really 16 and 32 bits,
95 * which for now are all Crays, pack and unpack have to play games.
96 */
97
98/*
99 * These values are required for portability of pack() output.
100 * If they're not right on your machine, then pack() and unpack()
101 * wouldn't work right anyway; you'll need to apply the Cray hack.
102 * (I'd like to check them with #if, but you can't use sizeof() in
103 * the preprocessor.)  --???
104 */
105/*
106    The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
107    defines are now in config.h.  --Andy Dougherty  April 1998
108 */
109#define SIZE16 2
110#define SIZE32 4
111
112/* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
113   --jhi Feb 1999 */
114
115#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32
116#  define OFF16(p)     ((char *) (p))
117#  define OFF32(p)     ((char *) (p))
118#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
119#  define OFF16(p)	((char*)(p))
120#  define OFF32(p)	((char*)(p))
121#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
122#  define OFF16(p)	((char*)(p) + (sizeof(U16) - SIZE16))
123#  define OFF32(p)	((char*)(p) + (sizeof(U32) - SIZE32))
124#else
125#  error "bad cray byte order"
126#endif
127
128#define PUSH16(utf8, cur, p, needs_swap)                        \
129       PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap)
130#define PUSH32(utf8, cur, p, needs_swap)                        \
131       PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap)
132
133#if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
134#  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN)
135#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678  /* little-endian */
136#  define NEEDS_SWAP(d)     (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN)
137#else
138#  error "Unsupported byteorder"
139        /* Need to add code here to re-instate mixed endian support.
140           NEEDS_SWAP would need to hold a flag indicating which action to
141           take, and S_reverse_copy and the code in S_utf8_to_bytes would need
142           logic adding to deal with any mixed-endian transformations needed.
143        */
144#endif
145
146/* Only to be used inside a loop (see the break) */
147#define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)	\
148STMT_START {						\
149    if (UNLIKELY(utf8)) {                               \
150        if (!S_utf8_to_bytes(aTHX_ &s, strend,		\
151          (char *) (buf), len, datumtype)) break;	\
152    } else {						\
153        if (UNLIKELY(needs_swap))                       \
154            S_reverse_copy(s, (char *) (buf), len);     \
155        else                                            \
156            Copy(s, (char *) (buf), len, char);		\
157        s += len;					\
158    }							\
159} STMT_END
160
161#define SHIFT16(utf8, s, strend, p, datumtype, needs_swap)              \
162       SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap)
163
164#define SHIFT32(utf8, s, strend, p, datumtype, needs_swap)              \
165       SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap)
166
167#define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap)          \
168       SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap)
169
170#define PUSH_VAR(utf8, aptr, var, needs_swap)           \
171       PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap)
172
173/* Avoid stack overflow due to pathological templates. 100 should be plenty. */
174#define MAX_SUB_TEMPLATE_LEVEL 100
175
176/* flags (note that type modifiers can also be used as flags!) */
177#define FLAG_WAS_UTF8	      0x40
178#define FLAG_PARSE_UTF8       0x20	/* Parse as utf8 */
179#define FLAG_UNPACK_ONLY_ONE  0x10
180#define FLAG_DO_UTF8          0x08	/* The underlying string is utf8 */
181#define FLAG_SLASH            0x04
182#define FLAG_COMMA            0x02
183#define FLAG_PACK             0x01
184
185STATIC SV *
186S_mul128(pTHX_ SV *sv, U8 m)
187{
188  STRLEN          len;
189  char           *s = SvPV(sv, len);
190  char           *t;
191
192  PERL_ARGS_ASSERT_MUL128;
193
194  if (! memBEGINs(s, len, "0000")) {  /* need to grow sv */
195    SV * const tmpNew = newSVpvs("0000000000");
196
197    sv_catsv(tmpNew, sv);
198    SvREFCNT_dec(sv);		/* free old sv */
199    sv = tmpNew;
200    s = SvPV(sv, len);
201  }
202  t = s + len - 1;
203  while (!*t)                   /* trailing '\0'? */
204    t--;
205  while (t > s) {
206    const U32 i = ((*t - '0') << 7) + m;
207    *(t--) = '0' + (char)(i % 10);
208    m = (char)(i / 10);
209  }
210  return (sv);
211}
212
213/* Explosives and implosives. */
214
215#define ISUUCHAR(ch)    inRANGE(NATIVE_TO_LATIN1(ch),               \
216                                NATIVE_TO_LATIN1(' '),              \
217                                NATIVE_TO_LATIN1('a') - 1)
218
219/* type modifiers */
220#define TYPE_IS_SHRIEKING	0x100
221#define TYPE_IS_BIG_ENDIAN	0x200
222#define TYPE_IS_LITTLE_ENDIAN	0x400
223#define TYPE_IS_PACK		0x800
224#define TYPE_ENDIANNESS_MASK	(TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
225#define TYPE_MODIFIERS(t)	((t) & ~0xFF)
226#define TYPE_NO_MODIFIERS(t)	((U8) (t))
227
228# define TYPE_ENDIANNESS(t)	((t) & TYPE_ENDIANNESS_MASK)
229# define TYPE_NO_ENDIANNESS(t)	((t) & ~TYPE_ENDIANNESS_MASK)
230
231# define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
232
233#define PACK_SIZE_CANNOT_CSUM		0x80
234#define PACK_SIZE_UNPREDICTABLE		0x40	/* Not a fixed size element */
235#define PACK_SIZE_MASK			0x3F
236
237#include "packsizetables.inc"
238
239static void
240S_reverse_copy(const char *src, char *dest, STRLEN len)
241{
242    dest += len;
243    while (len--)
244        *--dest = *src++;
245}
246
247STATIC U8
248utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
249{
250    STRLEN retlen;
251    UV val;
252
253    if (*s >= end) {
254        goto croak;
255    }
256    val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
257                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
258    if (retlen == (STRLEN) -1)
259      croak:
260        Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
261                   (int) TYPE_NO_MODIFIERS(datumtype));
262    if (val >= 0x100) {
263        Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
264                       "Character in '%c' format wrapped in unpack",
265                       (int) TYPE_NO_MODIFIERS(datumtype));
266        val = (U8) val;
267    }
268    *s += retlen;
269    return (U8)val;
270}
271
272#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
273        utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
274        *(U8 *)(s)++)
275
276STATIC bool
277S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype)
278{
279    UV val;
280    STRLEN retlen;
281    const char *from = *s;
282    int bad = 0;
283    const U32 flags = ckWARN(WARN_UTF8) ?
284        UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
285    const bool needs_swap = NEEDS_SWAP(datumtype);
286
287    if (UNLIKELY(needs_swap))
288        buf += buf_len;
289
290    for (;buf_len > 0; buf_len--) {
291        if (from >= end) return FALSE;
292        val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
293        if (retlen == (STRLEN) -1) {
294            from += UTF8_SAFE_SKIP(from, end);
295            bad |= 1;
296        } else from += retlen;
297        if (val >= 0x100) {
298            bad |= 2;
299            val = (U8) val;
300        }
301        if (UNLIKELY(needs_swap))
302            *(U8 *)--buf = (U8)val;
303        else
304            *(U8 *)buf++ = (U8)val;
305    }
306    /* We have enough characters for the buffer. Did we have problems ? */
307    if (bad) {
308        if (bad & 1) {
309            /* Rewalk the string fragment while warning */
310            const char *ptr;
311            const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
312            for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
313                if (ptr >= end) break;
314                utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
315            }
316            if (from > end) from = end;
317        }
318        if ((bad & 2))
319            Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
320                                       WARN_PACK : WARN_UNPACK),
321                           "Character(s) in '%c' format wrapped in %s",
322                           (int) TYPE_NO_MODIFIERS(datumtype),
323                           datumtype & TYPE_IS_PACK ? "pack" : "unpack");
324    }
325    *s = from;
326    return TRUE;
327}
328
329STATIC char *
330S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
331    PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
332
333    if (UNLIKELY(needs_swap)) {
334        const U8 *p = start + len;
335        while (p-- > start) {
336            append_utf8_from_native_byte(*p, (U8 **) & dest);
337        }
338    } else {
339        const U8 * const end = start + len;
340        while (start < end) {
341            append_utf8_from_native_byte(*start, (U8 **) & dest);
342            start++;
343        }
344    }
345    return dest;
346}
347
348#define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
349STMT_START {							\
350    if (UNLIKELY(utf8))	                                        \
351        (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap);       \
352    else {							\
353        if (UNLIKELY(needs_swap))                               \
354            S_reverse_copy((char *)(buf), cur, len);            \
355        else                                                    \
356            Copy(buf, cur, len, char);				\
357        (cur) += (len);						\
358    }								\
359} STMT_END
360
361#define SAFE_UTF8_EXPAND(var)	\
362STMT_START {				\
363    if ((var) > SSize_t_MAX / UTF8_EXPAND) \
364        Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
365    (var) = (var) * UTF8_EXPAND; \
366} STMT_END
367
368#define GROWING2(utf8, cat, start, cur, item_size, item_count)	\
369STMT_START {							\
370    if (SSize_t_MAX / (item_size) < (item_count))		\
371        Perl_croak(aTHX_ "%s", "Out of memory during pack()");	\
372    GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \
373} STMT_END
374
375#define GROWING(utf8, cat, start, cur, in_len)	\
376STMT_START {					\
377    STRLEN glen = (in_len);			\
378    STRLEN catcur = (STRLEN)((cur) - (start));	\
379    if (utf8) SAFE_UTF8_EXPAND(glen);		\
380    if (SSize_t_MAX - glen < catcur)		\
381        Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \
382    if (catcur + glen >= SvLEN(cat)) {	\
383        (start) = sv_exp_grow(cat, glen);	\
384        (cur) = (start) + SvCUR(cat);		\
385    }						\
386} STMT_END
387
388#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
389STMT_START {					\
390    const STRLEN glen = (in_len);		\
391    STRLEN gl = glen;				\
392    if (utf8) SAFE_UTF8_EXPAND(gl);		\
393    if ((cur) + gl >= (start) + SvLEN(cat)) {	\
394        *cur = '\0';				\
395        SvCUR_set((cat), (cur) - (start));	\
396        (start) = sv_exp_grow(cat, gl);		\
397        (cur) = (start) + SvCUR(cat);		\
398    }						\
399    PUSH_BYTES(utf8, cur, buf, glen, 0);        \
400} STMT_END
401
402#define PUSH_BYTE(utf8, s, byte)		\
403STMT_START {					\
404    if (utf8) {					\
405        const U8 au8 = (byte);			\
406        (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
407    } else *(U8 *)(s)++ = (byte);		\
408} STMT_END
409
410/* Only to be used inside a loop (see the break) */
411#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)		\
412STMT_START {							\
413    STRLEN retlen;						\
414    if (str >= end) break;					\
415    val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);	\
416    if (retlen == (STRLEN) -1) {			        \
417        *cur = '\0';						\
418        Perl_croak(aTHX_ "Malformed UTF-8 string in pack");	\
419    }								\
420    str += retlen;						\
421} STMT_END
422
423static const char *_action( const tempsym_t* symptr )
424{
425    return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
426}
427
428/* Returns the sizeof() struct described by pat */
429STATIC SSize_t
430S_measure_struct(pTHX_ tempsym_t* symptr)
431{
432    SSize_t total = 0;
433
434    PERL_ARGS_ASSERT_MEASURE_STRUCT;
435
436    while (next_symbol(symptr)) {
437        SSize_t len, size;
438
439        switch (symptr->howlen) {
440          case e_star:
441            Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
442                        _action( symptr ) );
443
444          default:
445            /* e_no_len and e_number */
446            len = symptr->length;
447            break;
448        }
449
450        size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
451        if (!size) {
452            SSize_t star;
453            /* endianness doesn't influence the size of a type */
454            switch(TYPE_NO_ENDIANNESS(symptr->code)) {
455            default:
456                /* diag_listed_as: Invalid type '%s' in %s */
457                Perl_croak(aTHX_ "Invalid type '%c' in %s",
458                           (int)TYPE_NO_MODIFIERS(symptr->code),
459                           _action( symptr ) );
460            case '.' | TYPE_IS_SHRIEKING:
461            case '@' | TYPE_IS_SHRIEKING:
462            case '@':
463            case '.':
464            case '/':
465            case 'U':			/* XXXX Is it correct? */
466            case 'w':
467            case 'u':
468                Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
469                           (int) TYPE_NO_MODIFIERS(symptr->code),
470                           _action( symptr ) );
471            case '%':
472                size = 0;
473                break;
474            case '(':
475            {
476                tempsym_t savsym = *symptr;
477                symptr->patptr = savsym.grpbeg;
478                symptr->patend = savsym.grpend;
479                /* XXXX Theoretically, we need to measure many times at
480                   different positions, since the subexpression may contain
481                   alignment commands, but be not of aligned length.
482                   Need to detect this and croak().  */
483                size = measure_struct(symptr);
484                *symptr = savsym;
485                break;
486            }
487            case 'X' | TYPE_IS_SHRIEKING:
488                /* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
489                 */
490                if (!len)		/* Avoid division by 0 */
491                    len = 1;
492                len = total % len;	/* Assumed: the start is aligned. */
493                /* FALLTHROUGH */
494            case 'X':
495                size = -1;
496                if (total < len)
497                    Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
498                break;
499            case 'x' | TYPE_IS_SHRIEKING:
500                if (!len)		/* Avoid division by 0 */
501                    len = 1;
502                star = total % len;	/* Assumed: the start is aligned. */
503                if (star)		/* Other portable ways? */
504                    len = len - star;
505                else
506                    len = 0;
507                /* FALLTHROUGH */
508            case 'x':
509            case 'A':
510            case 'Z':
511            case 'a':
512                size = 1;
513                break;
514            case 'B':
515            case 'b':
516                len = (len + 7)/8;
517                size = 1;
518                break;
519            case 'H':
520            case 'h':
521                len = (len + 1)/2;
522                size = 1;
523                break;
524
525            case 'P':
526                len = 1;
527                size = sizeof(char*);
528                break;
529            }
530        }
531        total += len * size;
532    }
533    return total;
534}
535
536
537/* locate matching closing parenthesis or bracket
538 * returns char pointer to char after match, or NULL
539 */
540STATIC const char *
541S_group_end(pTHX_ const char *patptr, const char *patend, char ender)
542{
543    PERL_ARGS_ASSERT_GROUP_END;
544    Size_t opened = 0;  /* number of pending opened brackets */
545
546    while (patptr < patend) {
547        const char c = *patptr++;
548
549        if (opened == 0 && c == ender)
550            return patptr-1;
551        else if (c == '#') {
552            while (patptr < patend && *patptr != '\n')
553                patptr++;
554            continue;
555        } else if (c == '(' || c == '[')
556            ++opened;
557        else if (c == ')' || c == ']') {
558            if (opened == 0)
559                Perl_croak(aTHX_ "Mismatched brackets in template");
560            --opened;
561        }
562    }
563    Perl_croak(aTHX_ "No group ending character '%c' found in template",
564               ender);
565    NOT_REACHED; /* NOTREACHED */
566}
567
568
569/* Convert unsigned decimal number to binary.
570 * Expects a pointer to the first digit and address of length variable
571 * Advances char pointer to 1st non-digit char and returns number
572 */
573STATIC const char *
574S_get_num(pTHX_ const char *patptr, SSize_t *lenptr )
575{
576  SSize_t len = *patptr++ - '0';
577
578  PERL_ARGS_ASSERT_GET_NUM;
579
580  while (isDIGIT(*patptr)) {
581    SSize_t nlen = (len * 10) + (*patptr++ - '0');
582    if (nlen < 0 || nlen/10 != len)
583      Perl_croak(aTHX_ "pack/unpack repeat count overflow");
584    len = nlen;
585  }
586  *lenptr = len;
587  return patptr;
588}
589
590/* The marvellous template parsing routine: Using state stored in *symptr,
591 * locates next template code and count
592 */
593STATIC bool
594S_next_symbol(pTHX_ tempsym_t* symptr )
595{
596  const char* patptr = symptr->patptr;
597  const char* const patend = symptr->patend;
598
599  PERL_ARGS_ASSERT_NEXT_SYMBOL;
600
601  symptr->flags &= ~FLAG_SLASH;
602
603  while (patptr < patend) {
604    if (isSPACE(*patptr))
605      patptr++;
606    else if (*patptr == '#') {
607      patptr++;
608      while (patptr < patend && *patptr != '\n')
609        patptr++;
610      if (patptr < patend)
611        patptr++;
612    } else {
613      /* We should have found a template code */
614      I32 code = (U8) *patptr++;
615      U32 inherited_modifiers = 0;
616
617      /* unrecognised characters in pack/unpack formats were made fatal in
618       * 5.004, with an exception added in 5.004_04 for ',' to "just" warn: */
619      if (code == ','){
620        if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
621          symptr->flags |= FLAG_COMMA;
622          /* diag_listed_as: Invalid type '%s' in %s */
623          Perl_warner(aTHX_ packWARN(WARN_UNPACK),
624                      "Invalid type ',' in %s", _action( symptr ) );
625        }
626        continue;
627      }
628
629      /* for '(', skip to ')' */
630      if (code == '(') {
631        if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
632          Perl_croak(aTHX_ "()-group starts with a count in %s",
633                        _action( symptr ) );
634        symptr->grpbeg = patptr;
635        patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
636        if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
637          Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
638                        _action( symptr ) );
639      }
640
641      /* look for group modifiers to inherit */
642      if (TYPE_ENDIANNESS(symptr->flags)) {
643        if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
644          inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
645      }
646
647      /* look for modifiers */
648      while (patptr < patend) {
649        const char *allowed;
650        I32 modifier;
651        switch (*patptr) {
652          case '!':
653            modifier = TYPE_IS_SHRIEKING;
654            allowed = "sSiIlLxXnNvV@.";
655            break;
656          case '>':
657            modifier = TYPE_IS_BIG_ENDIAN;
658            allowed = ENDIANNESS_ALLOWED_TYPES;
659            break;
660          case '<':
661            modifier = TYPE_IS_LITTLE_ENDIAN;
662            allowed = ENDIANNESS_ALLOWED_TYPES;
663            break;
664          default:
665            allowed = "";
666            modifier = 0;
667            break;
668        }
669
670        if (modifier == 0)
671          break;
672
673        if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
674          Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
675                        allowed, _action( symptr ) );
676
677        if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
678          Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
679                     (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
680        else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
681                 TYPE_ENDIANNESS_MASK)
682          Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
683                     *patptr, _action( symptr ) );
684
685        if ((code & modifier)) {
686            Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
687                           "Duplicate modifier '%c' after '%c' in %s",
688                           *patptr, (int) TYPE_NO_MODIFIERS(code),
689                           _action( symptr ) );
690        }
691
692        code |= modifier;
693        patptr++;
694      }
695
696      /* inherit modifiers */
697      code |= inherited_modifiers;
698
699      /* look for count and/or / */
700      if (patptr < patend) {
701        if (isDIGIT(*patptr)) {
702          patptr = get_num( patptr, &symptr->length );
703          symptr->howlen = e_number;
704
705        } else if (*patptr == '*') {
706          patptr++;
707          symptr->howlen = e_star;
708
709        } else if (*patptr == '[') {
710          const char* lenptr = ++patptr;
711          symptr->howlen = e_number;
712          patptr = group_end( patptr, patend, ']' ) + 1;
713          /* what kind of [] is it? */
714          if (isDIGIT(*lenptr)) {
715            lenptr = get_num( lenptr, &symptr->length );
716            if( *lenptr != ']' )
717              Perl_croak(aTHX_ "Malformed integer in [] in %s",
718                            _action( symptr ) );
719          } else {
720            tempsym_t savsym = *symptr;
721            symptr->patend = patptr-1;
722            symptr->patptr = lenptr;
723            savsym.length = measure_struct(symptr);
724            *symptr = savsym;
725          }
726        } else {
727          symptr->howlen = e_no_len;
728          symptr->length = 1;
729        }
730
731        /* try to find / */
732        while (patptr < patend) {
733          if (isSPACE(*patptr))
734            patptr++;
735          else if (*patptr == '#') {
736            patptr++;
737            while (patptr < patend && *patptr != '\n')
738              patptr++;
739            if (patptr < patend)
740              patptr++;
741          } else {
742            if (*patptr == '/') {
743              symptr->flags |= FLAG_SLASH;
744              patptr++;
745              if (patptr < patend &&
746                  (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
747                Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
748                            _action( symptr ) );
749            }
750            break;
751          }
752        }
753      } else {
754        /* at end - no count, no / */
755        symptr->howlen = e_no_len;
756        symptr->length = 1;
757      }
758
759      symptr->code = code;
760      symptr->patptr = patptr;
761      return TRUE;
762    }
763  }
764  symptr->patptr = patptr;
765  return FALSE;
766}
767
768/*
769   There is no way to cleanly handle the case where we should process the
770   string per byte in its upgraded form while it's really in downgraded form
771   (e.g. estimates like strend-s as an upper bound for the number of
772   characters left wouldn't work). So if we foresee the need of this
773   (pattern starts with U or contains U0), we want to work on the encoded
774   version of the string. Users are advised to upgrade their pack string
775   themselves if they need to do a lot of unpacks like this on it
776*/
777STATIC bool
778need_utf8(const char *pat, const char *patend)
779{
780    bool first = TRUE;
781
782    PERL_ARGS_ASSERT_NEED_UTF8;
783
784    while (pat < patend) {
785        if (pat[0] == '#') {
786            pat++;
787            pat = (const char *) memchr(pat, '\n', patend-pat);
788            if (!pat) return FALSE;
789        } else if (pat[0] == 'U') {
790            if (first || pat[1] == '0') return TRUE;
791        } else first = FALSE;
792        pat++;
793    }
794    return FALSE;
795}
796
797STATIC char
798first_symbol(const char *pat, const char *patend) {
799    PERL_ARGS_ASSERT_FIRST_SYMBOL;
800
801    while (pat < patend) {
802        if (pat[0] != '#') return pat[0];
803        pat++;
804        pat = (const char *) memchr(pat, '\n', patend-pat);
805        if (!pat) return 0;
806        pat++;
807    }
808    return 0;
809}
810
811/*
812
813=for apidoc unpackstring
814
815The engine implementing the C<unpack()> Perl function.
816
817Using the template C<pat..patend>, this function unpacks the string
818C<s..strend> into a number of mortal SVs, which it pushes onto the perl
819argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
820C<SPAGAIN> after the call to this function).  It returns the number of
821pushed elements.
822
823The C<strend> and C<patend> pointers should point to the byte following the
824last character of each string.
825
826Although this function returns its values on the perl argument stack, it
827doesn't take any parameters from that stack (and thus in particular
828there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
829example).
830
831=cut */
832
833SSize_t
834Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
835{
836    tempsym_t sym;
837
838    PERL_ARGS_ASSERT_UNPACKSTRING;
839
840    if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
841    else if (need_utf8(pat, patend)) {
842        /* We probably should try to avoid this in case a scalar context call
843           wouldn't get to the "U0" */
844        STRLEN len = strend - s;
845        s = (char *) bytes_to_utf8((U8 *) s, &len);
846        SAVEFREEPV(s);
847        strend = s + len;
848        flags |= FLAG_DO_UTF8;
849    }
850
851    if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
852        flags |= FLAG_PARSE_UTF8;
853
854    TEMPSYM_INIT(&sym, pat, patend, flags);
855
856    return unpack_rec(&sym, s, s, strend, NULL );
857}
858
859STATIC SSize_t
860S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
861{
862    dSP;
863    SV *sv = NULL;
864    const SSize_t start_sp_offset = SP - PL_stack_base;
865    howlen_t howlen;
866    SSize_t checksum = 0;
867    UV cuv = 0;
868    NV cdouble = 0.0;
869    const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv);
870    bool beyond = FALSE;
871    bool explicit_length;
872    const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
873    bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
874
875    PERL_ARGS_ASSERT_UNPACK_REC;
876
877    symptr->strbeg = s - strbeg;
878
879    while (next_symbol(symptr)) {
880        packprops_t props;
881        SSize_t len;
882        I32 datumtype = symptr->code;
883        bool needs_swap;
884        /* do first one only unless in list context
885           / is implemented by unpacking the count, then popping it from the
886           stack, so must check that we're not in the middle of a /  */
887        if ( unpack_only_one
888             && (SP - PL_stack_base == start_sp_offset + 1)
889             && (datumtype != '/') )   /* XXX can this be omitted */
890            break;
891
892        switch (howlen = symptr->howlen) {
893          case e_star:
894            len = strend - strbeg;	/* long enough */
895            break;
896          default:
897            /* e_no_len and e_number */
898            len = symptr->length;
899            break;
900        }
901
902        explicit_length = TRUE;
903      redo_switch:
904        beyond = s >= strend;
905
906        props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
907        if (props) {
908            /* props nonzero means we can process this letter. */
909            const SSize_t size = props & PACK_SIZE_MASK;
910            const SSize_t howmany = (strend - s) / size;
911            if (len > howmany)
912                len = howmany;
913
914            if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
915                if (len && unpack_only_one) len = 1;
916                EXTEND(SP, len);
917                EXTEND_MORTAL(len);
918            }
919        }
920
921        needs_swap = NEEDS_SWAP(datumtype);
922
923        switch(TYPE_NO_ENDIANNESS(datumtype)) {
924        default:
925            /* diag_listed_as: Invalid type '%s' in %s */
926            Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
927
928        case '%':
929            if (howlen == e_no_len)
930                len = 16;		/* len is not specified */
931            checksum = len;
932            cuv = 0;
933            cdouble = 0;
934            continue;
935
936        case '(':
937        {
938            tempsym_t savsym = *symptr;
939            const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
940            symptr->flags |= group_modifiers;
941            symptr->patend = savsym.grpend;
942            /* cppcheck-suppress autoVariables */
943            symptr->previous = &savsym;
944            symptr->level++;
945            PUTBACK;
946            if (len && unpack_only_one) len = 1;
947            while (len--) {
948                symptr->patptr = savsym.grpbeg;
949                if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
950                else      symptr->flags &= ~FLAG_PARSE_UTF8;
951                unpack_rec(symptr, s, strbeg, strend, &s);
952                if (s == strend && savsym.howlen == e_star)
953                    break; /* No way to continue */
954            }
955            SPAGAIN;
956            savsym.flags = symptr->flags & ~group_modifiers;
957            *symptr = savsym;
958            break;
959        }
960        case '.' | TYPE_IS_SHRIEKING:
961        case '.': {
962            const char *from;
963            SV *sv;
964            const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
965            if (howlen == e_star) from = strbeg;
966            else if (len <= 0) from = s;
967            else {
968                tempsym_t *group = symptr;
969
970                while (--len && group) group = group->previous;
971                from = group ? strbeg + group->strbeg : strbeg;
972            }
973            sv = from <= s ?
974                newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
975                newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
976            mXPUSHs(sv);
977            break;
978        }
979        case '@' | TYPE_IS_SHRIEKING:
980        case '@':
981            s = strbeg + symptr->strbeg;
982            if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
983            {
984                while (len > 0) {
985                    if (s >= strend)
986                        Perl_croak(aTHX_ "'@' outside of string in unpack");
987                    s += UTF8SKIP(s);
988                    len--;
989                }
990                if (s > strend)
991                    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
992            } else {
993                if (strend-s < len)
994                    Perl_croak(aTHX_ "'@' outside of string in unpack");
995                s += len;
996            }
997            break;
998        case 'X' | TYPE_IS_SHRIEKING:
999            if (!len)			/* Avoid division by 0 */
1000                len = 1;
1001            if (utf8) {
1002                const char *hop, *last;
1003                SSize_t l = len;
1004                hop = last = strbeg;
1005                while (hop < s) {
1006                    hop += UTF8SKIP(hop);
1007                    if (--l == 0) {
1008                        last = hop;
1009                        l = len;
1010                    }
1011                }
1012                if (last > s)
1013                    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1014                s = last;
1015                break;
1016            }
1017            len = (s - strbeg) % len;
1018            /* FALLTHROUGH */
1019        case 'X':
1020            if (utf8) {
1021                while (len > 0) {
1022                    if (s <= strbeg)
1023                        Perl_croak(aTHX_ "'X' outside of string in unpack");
1024                    while (--s, UTF8_IS_CONTINUATION(*s)) {
1025                        if (s <= strbeg)
1026                            Perl_croak(aTHX_ "'X' outside of string in unpack");
1027                    }
1028                    len--;
1029                }
1030            } else {
1031                if (len > s - strbeg)
1032                    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1033                s -= len;
1034            }
1035            break;
1036        case 'x' | TYPE_IS_SHRIEKING: {
1037            SSize_t ai32;
1038            if (!len)			/* Avoid division by 0 */
1039                len = 1;
1040            if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1041            else      ai32 = (s - strbeg)                         % len;
1042            if (ai32 == 0) break;
1043            len -= ai32;
1044            }
1045            /* FALLTHROUGH */
1046        case 'x':
1047            if (utf8) {
1048                while (len>0) {
1049                    if (s >= strend)
1050                        Perl_croak(aTHX_ "'x' outside of string in unpack");
1051                    s += UTF8SKIP(s);
1052                    len--;
1053                }
1054            } else {
1055                if (len > strend - s)
1056                    Perl_croak(aTHX_ "'x' outside of string in unpack");
1057                s += len;
1058            }
1059            break;
1060        case '/':
1061            Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1062
1063        case 'A':
1064        case 'Z':
1065        case 'a':
1066            if (checksum) {
1067                /* Preliminary length estimate is assumed done in 'W' */
1068                if (len > strend - s) len = strend - s;
1069                goto W_checksum;
1070            }
1071            if (utf8) {
1072                SSize_t l;
1073                const char *hop;
1074                for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1075                    if (hop >= strend) {
1076                        if (hop > strend)
1077                            Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1078                        break;
1079                    }
1080                }
1081                if (hop > strend)
1082                    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1083                len = hop - s;
1084            } else if (len > strend - s)
1085                len = strend - s;
1086
1087            if (datumtype == 'Z') {
1088                /* 'Z' strips stuff after first null */
1089                const char *ptr, *end;
1090                end = s + len;
1091                for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1092                sv = newSVpvn(s, ptr-s);
1093                if (howlen == e_star) /* exact for 'Z*' */
1094                    len = ptr-s + (ptr != strend ? 1 : 0);
1095            } else if (datumtype == 'A') {
1096                /* 'A' strips both nulls and spaces */
1097                const char *ptr;
1098                if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1099                    for (ptr = s+len-1; ptr >= s; ptr--) {
1100                        if (   *ptr != 0
1101                            && !UTF8_IS_CONTINUATION(*ptr)
1102                            && !isSPACE_utf8_safe(ptr, strend))
1103                        {
1104                            break;
1105                        }
1106                    }
1107                    if (ptr >= s) ptr += UTF8SKIP(ptr);
1108                    else ptr++;
1109                    if (ptr > s+len)
1110                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1111                } else {
1112                    for (ptr = s+len-1; ptr >= s; ptr--)
1113                        if (*ptr != 0 && !isSPACE(*ptr)) break;
1114                    ptr++;
1115                }
1116                sv = newSVpvn(s, ptr-s);
1117            } else sv = newSVpvn(s, len);
1118
1119            if (utf8) {
1120                SvUTF8_on(sv);
1121                /* Undo any upgrade done due to need_utf8() */
1122                if (!(symptr->flags & FLAG_WAS_UTF8))
1123                    sv_utf8_downgrade(sv, 0);
1124            }
1125            mXPUSHs(sv);
1126            s += len;
1127            break;
1128        case 'B':
1129        case 'b': {
1130            char *str;
1131            if (howlen == e_star || len > (strend - s) * 8)
1132                len = (strend - s) * 8;
1133            if (checksum) {
1134                if (utf8)
1135                    while (len >= 8 && s < strend) {
1136                        cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
1137                        len -= 8;
1138                    }
1139                else
1140                    while (len >= 8) {
1141                        cuv += PL_bitcount[*(U8 *)s++];
1142                        len -= 8;
1143                    }
1144                if (len && s < strend) {
1145                    U8 bits;
1146                    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1147                    if (datumtype == 'b')
1148                        while (len-- > 0) {
1149                            if (bits & 1) cuv++;
1150                            bits >>= 1;
1151                        }
1152                    else
1153                        while (len-- > 0) {
1154                            if (bits & 0x80) cuv++;
1155                            bits <<= 1;
1156                        }
1157                }
1158                break;
1159            }
1160
1161            sv = sv_2mortal(newSV(len ? len : 1));
1162            SvPOK_on(sv);
1163            str = SvPVX(sv);
1164            if (datumtype == 'b') {
1165                U8 bits = 0;
1166                const SSize_t ai32 = len;
1167                for (len = 0; len < ai32; len++) {
1168                    if (len & 7) bits >>= 1;
1169                    else if (utf8) {
1170                        if (s >= strend) break;
1171                        bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1172                    } else bits = *(U8 *) s++;
1173                    *str++ = bits & 1 ? '1' : '0';
1174                }
1175            } else {
1176                U8 bits = 0;
1177                const SSize_t ai32 = len;
1178                for (len = 0; len < ai32; len++) {
1179                    if (len & 7) bits <<= 1;
1180                    else if (utf8) {
1181                        if (s >= strend) break;
1182                        bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1183                    } else bits = *(U8 *) s++;
1184                    *str++ = bits & 0x80 ? '1' : '0';
1185                }
1186            }
1187            *str = '\0';
1188            SvCUR_set(sv, str - SvPVX_const(sv));
1189            XPUSHs(sv);
1190            break;
1191        }
1192        case 'H':
1193        case 'h': {
1194            char *str = NULL;
1195            /* Preliminary length estimate, acceptable for utf8 too */
1196            if (howlen == e_star || len > (strend - s) * 2)
1197                len = (strend - s) * 2;
1198            if (!checksum) {
1199                sv = sv_2mortal(newSV(len ? len : 1));
1200                SvPOK_on(sv);
1201                str = SvPVX(sv);
1202            }
1203            if (datumtype == 'h') {
1204                U8 bits = 0;
1205                SSize_t ai32 = len;
1206                for (len = 0; len < ai32; len++) {
1207                    if (len & 1) bits >>= 4;
1208                    else if (utf8) {
1209                        if (s >= strend) break;
1210                        bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1211                    } else bits = * (U8 *) s++;
1212                    if (!checksum)
1213                        *str++ = PL_hexdigit[bits & 15];
1214                }
1215            } else {
1216                U8 bits = 0;
1217                const SSize_t ai32 = len;
1218                for (len = 0; len < ai32; len++) {
1219                    if (len & 1) bits <<= 4;
1220                    else if (utf8) {
1221                        if (s >= strend) break;
1222                        bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
1223                    } else bits = *(U8 *) s++;
1224                    if (!checksum)
1225                        *str++ = PL_hexdigit[(bits >> 4) & 15];
1226                }
1227            }
1228            if (!checksum) {
1229                *str = '\0';
1230                SvCUR_set(sv, str - SvPVX_const(sv));
1231                XPUSHs(sv);
1232            }
1233            break;
1234        }
1235        case 'C':
1236            if (len == 0) {
1237                if (explicit_length)
1238                    /* Switch to "character" mode */
1239                    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1240                break;
1241            }
1242            /* FALLTHROUGH */
1243        case 'c':
1244            while (len-- > 0 && s < strend) {
1245                int aint;
1246                if (utf8)
1247                  {
1248                    STRLEN retlen;
1249                    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1250                                 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1251                    if (retlen == (STRLEN) -1)
1252                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1253                    s += retlen;
1254                  }
1255                else
1256                  aint = *(U8 *)(s)++;
1257                if (aint >= 128 && datumtype != 'C')	/* fake up signed chars */
1258                    aint -= 256;
1259                if (!checksum)
1260                    mPUSHi(aint);
1261                else if (checksum > bits_in_uv)
1262                    cdouble += (NV)aint;
1263                else
1264                    cuv += aint;
1265            }
1266            break;
1267        case 'W':
1268          W_checksum:
1269            if (utf8) {
1270                while (len-- > 0 && s < strend) {
1271                    STRLEN retlen;
1272                    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1273                                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1274                    if (retlen == (STRLEN) -1)
1275                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1276                    s += retlen;
1277                    if (!checksum)
1278                        mPUSHu(val);
1279                    else if (checksum > bits_in_uv)
1280                        cdouble += (NV) val;
1281                    else
1282                        cuv += val;
1283                }
1284            } else if (!checksum)
1285                while (len-- > 0) {
1286                    const U8 ch = *(U8 *) s++;
1287                    mPUSHu(ch);
1288            }
1289            else if (checksum > bits_in_uv)
1290                while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1291            else
1292                while (len-- > 0) cuv += *(U8 *) s++;
1293            break;
1294        case 'U':
1295            if (len == 0) {
1296                if (explicit_length && howlen != e_star) {
1297                    /* Switch to "bytes in UTF-8" mode */
1298                    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1299                    else
1300                        /* Should be impossible due to the need_utf8() test */
1301                        Perl_croak(aTHX_ "U0 mode on a byte string");
1302                }
1303                break;
1304            }
1305            if (len > strend - s) len = strend - s;
1306            if (!checksum) {
1307                if (len && unpack_only_one) len = 1;
1308                EXTEND(SP, len);
1309                EXTEND_MORTAL(len);
1310            }
1311            while (len-- > 0 && s < strend) {
1312                STRLEN retlen;
1313                UV auv;
1314                if (utf8) {
1315                    U8 result[UTF8_MAXLEN+1];
1316                    const char *ptr = s;
1317                    STRLEN len;
1318                    /* Bug: warns about bad utf8 even if we are short on bytes
1319                       and will break out of the loop */
1320                    if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1321                                      'U'))
1322                        break;
1323                    len = UTF8SKIP(result);
1324                    if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
1325                                      (char *) &result[1], len-1, 'U')) break;
1326                    auv = utf8n_to_uvchr(result, len, &retlen,
1327                                         UTF8_ALLOW_DEFAULT);
1328                    s = ptr;
1329                } else {
1330                    auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen,
1331                                         UTF8_ALLOW_DEFAULT);
1332                    if (retlen == (STRLEN) -1)
1333                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1334                    s += retlen;
1335                }
1336                if (!checksum)
1337                    mPUSHu(auv);
1338                else if (checksum > bits_in_uv)
1339                    cdouble += (NV) auv;
1340                else
1341                    cuv += auv;
1342            }
1343            break;
1344        case 's' | TYPE_IS_SHRIEKING:
1345#if SHORTSIZE != SIZE16
1346            while (len-- > 0) {
1347                short ashort;
1348                SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap);
1349                if (!checksum)
1350                    mPUSHi(ashort);
1351                else if (checksum > bits_in_uv)
1352                    cdouble += (NV)ashort;
1353                else
1354                    cuv += ashort;
1355            }
1356            break;
1357#else
1358            /* FALLTHROUGH */
1359#endif
1360        case 's':
1361            while (len-- > 0) {
1362                I16 ai16;
1363
1364#if U16SIZE > SIZE16
1365                ai16 = 0;
1366#endif
1367                SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1368#if U16SIZE > SIZE16
1369                if (ai16 > 32767)
1370                    ai16 -= 65536;
1371#endif
1372                if (!checksum)
1373                    mPUSHi(ai16);
1374                else if (checksum > bits_in_uv)
1375                    cdouble += (NV)ai16;
1376                else
1377                    cuv += ai16;
1378            }
1379            break;
1380        case 'S' | TYPE_IS_SHRIEKING:
1381#if SHORTSIZE != SIZE16
1382            while (len-- > 0) {
1383                unsigned short aushort;
1384                SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap);
1385                if (!checksum)
1386                    mPUSHu(aushort);
1387                else if (checksum > bits_in_uv)
1388                    cdouble += (NV)aushort;
1389                else
1390                    cuv += aushort;
1391            }
1392            break;
1393#else
1394            /* FALLTHROUGH */
1395#endif
1396        case 'v':
1397        case 'n':
1398        case 'S':
1399            while (len-- > 0) {
1400                U16 au16;
1401#if U16SIZE > SIZE16
1402                au16 = 0;
1403#endif
1404                SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap);
1405                if (datumtype == 'n')
1406                    au16 = PerlSock_ntohs(au16);
1407                if (datumtype == 'v')
1408                    au16 = vtohs(au16);
1409                if (!checksum)
1410                    mPUSHu(au16);
1411                else if (checksum > bits_in_uv)
1412                    cdouble += (NV) au16;
1413                else
1414                    cuv += au16;
1415            }
1416            break;
1417        case 'v' | TYPE_IS_SHRIEKING:
1418        case 'n' | TYPE_IS_SHRIEKING:
1419            while (len-- > 0) {
1420                I16 ai16;
1421# if U16SIZE > SIZE16
1422                ai16 = 0;
1423# endif
1424                SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap);
1425                /* There should never be any byte-swapping here.  */
1426                assert(!TYPE_ENDIANNESS(datumtype));
1427                if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1428                    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1429                if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1430                    ai16 = (I16) vtohs((U16) ai16);
1431                if (!checksum)
1432                    mPUSHi(ai16);
1433                else if (checksum > bits_in_uv)
1434                    cdouble += (NV) ai16;
1435                else
1436                    cuv += ai16;
1437            }
1438            break;
1439        case 'i':
1440        case 'i' | TYPE_IS_SHRIEKING:
1441            while (len-- > 0) {
1442                int aint;
1443                SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap);
1444                if (!checksum)
1445                    mPUSHi(aint);
1446                else if (checksum > bits_in_uv)
1447                    cdouble += (NV)aint;
1448                else
1449                    cuv += aint;
1450            }
1451            break;
1452        case 'I':
1453        case 'I' | TYPE_IS_SHRIEKING:
1454            while (len-- > 0) {
1455                unsigned int auint;
1456                SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap);
1457                if (!checksum)
1458                    mPUSHu(auint);
1459                else if (checksum > bits_in_uv)
1460                    cdouble += (NV)auint;
1461                else
1462                    cuv += auint;
1463            }
1464            break;
1465        case 'j':
1466            while (len-- > 0) {
1467                IV aiv;
1468                SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap);
1469                if (!checksum)
1470                    mPUSHi(aiv);
1471                else if (checksum > bits_in_uv)
1472                    cdouble += (NV)aiv;
1473                else
1474                    cuv += aiv;
1475            }
1476            break;
1477        case 'J':
1478            while (len-- > 0) {
1479                UV auv;
1480                SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap);
1481                if (!checksum)
1482                    mPUSHu(auv);
1483                else if (checksum > bits_in_uv)
1484                    cdouble += (NV)auv;
1485                else
1486                    cuv += auv;
1487            }
1488            break;
1489        case 'l' | TYPE_IS_SHRIEKING:
1490#if LONGSIZE != SIZE32
1491            while (len-- > 0) {
1492                long along;
1493                SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap);
1494                if (!checksum)
1495                    mPUSHi(along);
1496                else if (checksum > bits_in_uv)
1497                    cdouble += (NV)along;
1498                else
1499                    cuv += along;
1500            }
1501            break;
1502#else
1503            /* FALLTHROUGH */
1504#endif
1505        case 'l':
1506            while (len-- > 0) {
1507                I32 ai32;
1508#if U32SIZE > SIZE32
1509                ai32 = 0;
1510#endif
1511                SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1512#if U32SIZE > SIZE32
1513                if (ai32 > 2147483647) ai32 -= 4294967296;
1514#endif
1515                if (!checksum)
1516                    mPUSHi(ai32);
1517                else if (checksum > bits_in_uv)
1518                    cdouble += (NV)ai32;
1519                else
1520                    cuv += ai32;
1521            }
1522            break;
1523        case 'L' | TYPE_IS_SHRIEKING:
1524#if LONGSIZE != SIZE32
1525            while (len-- > 0) {
1526                unsigned long aulong;
1527                SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap);
1528                if (!checksum)
1529                    mPUSHu(aulong);
1530                else if (checksum > bits_in_uv)
1531                    cdouble += (NV)aulong;
1532                else
1533                    cuv += aulong;
1534            }
1535            break;
1536#else
1537            /* FALLTHROUGH */
1538#endif
1539        case 'V':
1540        case 'N':
1541        case 'L':
1542            while (len-- > 0) {
1543                U32 au32;
1544#if U32SIZE > SIZE32
1545                au32 = 0;
1546#endif
1547                SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap);
1548                if (datumtype == 'N')
1549                    au32 = PerlSock_ntohl(au32);
1550                if (datumtype == 'V')
1551                    au32 = vtohl(au32);
1552                if (!checksum)
1553                    mPUSHu(au32);
1554                else if (checksum > bits_in_uv)
1555                    cdouble += (NV)au32;
1556                else
1557                    cuv += au32;
1558            }
1559            break;
1560        case 'V' | TYPE_IS_SHRIEKING:
1561        case 'N' | TYPE_IS_SHRIEKING:
1562            while (len-- > 0) {
1563                I32 ai32;
1564#if U32SIZE > SIZE32
1565                ai32 = 0;
1566#endif
1567                SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap);
1568                /* There should never be any byte swapping here.  */
1569                assert(!TYPE_ENDIANNESS(datumtype));
1570                if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1571                    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1572                if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1573                    ai32 = (I32)vtohl((U32)ai32);
1574                if (!checksum)
1575                    mPUSHi(ai32);
1576                else if (checksum > bits_in_uv)
1577                    cdouble += (NV)ai32;
1578                else
1579                    cuv += ai32;
1580            }
1581            break;
1582        case 'p':
1583            while (len-- > 0) {
1584                const char *aptr;
1585                SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1586                /* newSVpv generates undef if aptr is NULL */
1587                mPUSHs(newSVpv(aptr, 0));
1588            }
1589            break;
1590        case 'w':
1591            {
1592                UV auv = 0;
1593                size_t bytes = 0;
1594
1595                while (len > 0 && s < strend) {
1596                    U8 ch;
1597                    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1598                    auv = (auv << 7) | (ch & 0x7f);
1599                    /* UTF8_IS_XXXXX not right here because this is a BER, not
1600                     * UTF-8 format - using constant 0x80 */
1601                    if (ch < 0x80) {
1602                        bytes = 0;
1603                        mPUSHu(auv);
1604                        len--;
1605                        auv = 0;
1606                        continue;
1607                    }
1608                    if (++bytes >= sizeof(UV)) {	/* promote to string */
1609                        const char *t;
1610
1611                        sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
1612                                                 (int)TYPE_DIGITS(UV), auv);
1613                        while (s < strend) {
1614                            ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1615                            sv = mul128(sv, (U8)(ch & 0x7f));
1616                            if (!(ch & 0x80)) {
1617                                bytes = 0;
1618                                break;
1619                            }
1620                        }
1621                        t = SvPV_nolen_const(sv);
1622                        while (*t == '0')
1623                            t++;
1624                        sv_chop(sv, t);
1625                        mPUSHs(sv);
1626                        len--;
1627                        auv = 0;
1628                    }
1629                }
1630                if ((s >= strend) && bytes)
1631                    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
1632            }
1633            break;
1634        case 'P':
1635            if (symptr->howlen == e_star)
1636                Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
1637            EXTEND(SP, 1);
1638            if (s + sizeof(char*) <= strend) {
1639                char *aptr;
1640                SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap);
1641                /* newSVpvn generates undef if aptr is NULL */
1642                PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
1643            }
1644            break;
1645#if defined(HAS_QUAD) && IVSIZE >= 8
1646        case 'q':
1647            while (len-- > 0) {
1648                Quad_t aquad;
1649                SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
1650                if (!checksum)
1651                    mPUSHs(newSViv((IV)aquad));
1652                else if (checksum > bits_in_uv)
1653                    cdouble += (NV)aquad;
1654                else
1655                    cuv += aquad;
1656            }
1657            break;
1658        case 'Q':
1659            while (len-- > 0) {
1660                Uquad_t auquad;
1661                SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
1662                if (!checksum)
1663                    mPUSHs(newSVuv((UV)auquad));
1664                else if (checksum > bits_in_uv)
1665                    cdouble += (NV)auquad;
1666                else
1667                    cuv += auquad;
1668            }
1669            break;
1670#endif
1671        /* float and double added gnb@melba.bby.oz.au 22/11/89 */
1672        case 'f':
1673            while (len-- > 0) {
1674                float afloat;
1675                SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap);
1676                if (!checksum)
1677                    mPUSHn(afloat);
1678                else
1679                    cdouble += afloat;
1680            }
1681            break;
1682        case 'd':
1683            while (len-- > 0) {
1684                double adouble;
1685                SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap);
1686                if (!checksum)
1687                    mPUSHn(adouble);
1688                else
1689                    cdouble += adouble;
1690            }
1691            break;
1692        case 'F':
1693            while (len-- > 0) {
1694                NV_bytes anv;
1695                SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes),
1696                            datumtype, needs_swap);
1697                if (!checksum)
1698                    mPUSHn(anv.nv);
1699                else
1700                    cdouble += anv.nv;
1701            }
1702            break;
1703#if defined(HAS_LONG_DOUBLE)
1704        case 'D':
1705            while (len-- > 0) {
1706                ld_bytes aldouble;
1707                SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
1708                            sizeof(aldouble.bytes), datumtype, needs_swap);
1709                /* The most common long double format, the x86 80-bit
1710                 * extended precision, has either 2 or 6 unused bytes,
1711                 * which may contain garbage, which may contain
1712                 * unintentional data.  While we do zero the bytes of
1713                 * the long double data in pack(), here in unpack() we
1714                 * don't, because it's really hard to envision that
1715                 * reading the long double off aldouble would be
1716                 * affected by the unused bytes.
1717                 *
1718                 * Note that trying to unpack 'long doubles' of 'long
1719                 * doubles' packed in another system is in the general
1720                 * case doomed without having more detail. */
1721                if (!checksum)
1722                    mPUSHn(aldouble.ld);
1723                else
1724                    cdouble += aldouble.ld;
1725            }
1726            break;
1727#endif
1728        case 'u':
1729            if (!checksum) {
1730                const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
1731                sv = sv_2mortal(newSV(l));
1732                if (l) {
1733                    SvPOK_on(sv);
1734                    *SvEND(sv) = '\0';
1735                }
1736            }
1737
1738            /* Note that all legal uuencoded strings are ASCII printables, so
1739             * have the same representation under UTF-8 vs not.  This means we
1740             * can ignore UTF8ness on legal input.  For illegal we stop at the
1741             * first failure, and don't report where/what that is, so again we
1742             * can ignore UTF8ness */
1743
1744            while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
1745                I32 a, b, c, d;
1746                char hunk[3];
1747
1748                len = PL_uudmap[*(U8*)s++] & 077;
1749                while (len > 0) {
1750                    if (s < strend && ISUUCHAR(*s))
1751                        a = PL_uudmap[*(U8*)s++] & 077;
1752                    else
1753                        a = 0;
1754                    if (s < strend && ISUUCHAR(*s))
1755                        b = PL_uudmap[*(U8*)s++] & 077;
1756                    else
1757                        b = 0;
1758                    if (s < strend && ISUUCHAR(*s))
1759                        c = PL_uudmap[*(U8*)s++] & 077;
1760                    else
1761                        c = 0;
1762                    if (s < strend && ISUUCHAR(*s))
1763                        d = PL_uudmap[*(U8*)s++] & 077;
1764                    else
1765                        d = 0;
1766                    hunk[0] = (char)((a << 2) | (b >> 4));
1767                    hunk[1] = (char)((b << 4) | (c >> 2));
1768                    hunk[2] = (char)((c << 6) | d);
1769                    if (!checksum)
1770                        sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
1771                    len -= 3;
1772                }
1773                if (*s == '\n')
1774                    s++;
1775                else	/* possible checksum byte */
1776                    if (s + 1 < strend && s[1] == '\n')
1777                        s += 2;
1778            }
1779            if (!checksum)
1780                XPUSHs(sv);
1781            break;
1782        } /* End of switch */
1783
1784        if (checksum) {
1785            if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
1786              (checksum > bits_in_uv &&
1787               memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
1788                NV trouble, anv;
1789
1790                anv = (NV) (1 << (checksum & 15));
1791                while (checksum >= 16) {
1792                    checksum -= 16;
1793                    anv *= 65536.0;
1794                }
1795                while (cdouble < 0.0)
1796                    cdouble += anv;
1797                cdouble = Perl_modf(cdouble / anv, &trouble);
1798#ifdef LONGDOUBLE_DOUBLEDOUBLE
1799                /* Workaround for powerpc doubledouble modfl bug:
1800                 * close to 1.0L and -1.0L cdouble is 0, and trouble
1801                 * is cdouble / anv. */
1802                if (trouble != Perl_ceil(trouble)) {
1803                  cdouble = trouble;
1804                  if (cdouble >  1.0L) cdouble -= 1.0L;
1805                  if (cdouble < -1.0L) cdouble += 1.0L;
1806                }
1807#endif
1808                cdouble *= anv;
1809                sv = newSVnv(cdouble);
1810            }
1811            else {
1812                if (checksum < bits_in_uv) {
1813                    UV mask = nBIT_MASK(checksum);
1814                    cuv &= mask;
1815                }
1816                sv = newSVuv(cuv);
1817            }
1818            mXPUSHs(sv);
1819            checksum = 0;
1820        }
1821
1822        if (symptr->flags & FLAG_SLASH){
1823            if (SP - PL_stack_base - start_sp_offset <= 0)
1824                break;
1825            if( next_symbol(symptr) ){
1826              if( symptr->howlen == e_number )
1827                Perl_croak(aTHX_ "Count after length/code in unpack" );
1828              if( beyond ){
1829                /* ...end of char buffer then no decent length available */
1830                Perl_croak(aTHX_ "length/code after end of string in unpack" );
1831              } else {
1832                /* take top of stack (hope it's numeric) */
1833                len = POPi;
1834                if( len < 0 )
1835                    Perl_croak(aTHX_ "Negative '/' count in unpack" );
1836              }
1837            } else {
1838                Perl_croak(aTHX_ "Code missing after '/' in unpack" );
1839            }
1840            datumtype = symptr->code;
1841            explicit_length = FALSE;
1842            goto redo_switch;
1843        }
1844    }
1845
1846    if (new_s)
1847        *new_s = s;
1848    PUTBACK;
1849    return SP - PL_stack_base - start_sp_offset;
1850}
1851
1852PP(pp_unpack)
1853{
1854    dSP;
1855    dPOPPOPssrl;
1856    U8 gimme = GIMME_V;
1857    STRLEN llen;
1858    STRLEN rlen;
1859    const char *pat = SvPV_const(left,  llen);
1860    const char *s   = SvPV_const(right, rlen);
1861    const char *strend = s + rlen;
1862    const char *patend = pat + llen;
1863    SSize_t cnt;
1864
1865    PUTBACK;
1866    cnt = unpackstring(pat, patend, s, strend,
1867                     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
1868                     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
1869
1870    SPAGAIN;
1871    if ( !cnt && gimme == G_SCALAR )
1872       PUSHs(&PL_sv_undef);
1873    RETURN;
1874}
1875
1876STATIC U8 *
1877doencodes(U8 *h, const U8 *s, SSize_t len)
1878{
1879    *h++ = PL_uuemap[len];
1880    while (len > 2) {
1881        *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1882        *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
1883        *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
1884        *h++ = PL_uuemap[(077 & (s[2] & 077))];
1885        s += 3;
1886        len -= 3;
1887    }
1888    if (len > 0) {
1889        const U8 r = (len > 1 ? s[1] : '\0');
1890        *h++ = PL_uuemap[(077 & (s[0] >> 2))];
1891        *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
1892        *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
1893        *h++ = PL_uuemap[0];
1894    }
1895    *h++ = '\n';
1896    return h;
1897}
1898
1899STATIC SV *
1900S_is_an_int(pTHX_ const char *s, STRLEN l)
1901{
1902  SV *result = newSVpvn(s, l);
1903  char *const result_c = SvPV_nolen(result);	/* convenience */
1904  char *out = result_c;
1905  bool skip = 1;
1906  bool ignore = 0;
1907
1908  PERL_ARGS_ASSERT_IS_AN_INT;
1909
1910  while (*s) {
1911    switch (*s) {
1912    case ' ':
1913      break;
1914    case '+':
1915      if (!skip) {
1916        SvREFCNT_dec(result);
1917        return (NULL);
1918      }
1919      break;
1920    case '0':
1921    case '1':
1922    case '2':
1923    case '3':
1924    case '4':
1925    case '5':
1926    case '6':
1927    case '7':
1928    case '8':
1929    case '9':
1930      skip = 0;
1931      if (!ignore) {
1932        *(out++) = *s;
1933      }
1934      break;
1935    case '.':
1936      ignore = 1;
1937      break;
1938    default:
1939      SvREFCNT_dec(result);
1940      return (NULL);
1941    }
1942    s++;
1943  }
1944  *(out++) = '\0';
1945  SvCUR_set(result, out - result_c);
1946  return (result);
1947}
1948
1949/* pnum must be '\0' terminated */
1950STATIC int
1951S_div128(pTHX_ SV *pnum, bool *done)
1952{
1953    STRLEN len;
1954    char * const s = SvPV(pnum, len);
1955    char *t = s;
1956    int m = 0;
1957
1958    PERL_ARGS_ASSERT_DIV128;
1959
1960    *done = 1;
1961    while (*t) {
1962        const int i = m * 10 + (*t - '0');
1963        const int r = (i >> 7); /* r < 10 */
1964        m = i & 0x7F;
1965        if (r) {
1966            *done = 0;
1967        }
1968        *(t++) = '0' + r;
1969    }
1970    *(t++) = '\0';
1971    SvCUR_set(pnum, (STRLEN) (t - s));
1972    return (m);
1973}
1974
1975/*
1976=for apidoc packlist
1977
1978The engine implementing C<pack()> Perl function.
1979
1980=cut
1981*/
1982
1983void
1984Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
1985{
1986    tempsym_t sym;
1987
1988    PERL_ARGS_ASSERT_PACKLIST;
1989
1990    TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
1991
1992    /* We're going to do changes through SvPVX(cat). Make sure it's valid.
1993       Also make sure any UTF8 flag is loaded */
1994    SvPV_force_nolen(cat);
1995    if (DO_UTF8(cat))
1996        sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
1997
1998    (void)pack_rec( cat, &sym, beglist, endlist );
1999}
2000
2001/* like sv_utf8_upgrade, but also repoint the group start markers */
2002STATIC void
2003marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2004    STRLEN len;
2005    tempsym_t *group;
2006    const char *from_ptr, *from_start, *from_end, **marks, **m;
2007    char *to_start, *to_ptr;
2008
2009    if (SvUTF8(sv)) return;
2010
2011    from_start = SvPVX_const(sv);
2012    from_end = from_start + SvCUR(sv);
2013    for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2014        if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
2015    if (from_ptr == from_end) {
2016        /* Simple case: no character needs to be changed */
2017        SvUTF8_on(sv);
2018        return;
2019    }
2020
2021    len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2022    Newx(to_start, len, char);
2023    Copy(from_start, to_start, from_ptr-from_start, char);
2024    to_ptr = to_start + (from_ptr-from_start);
2025
2026    Newx(marks, sym_ptr->level+2, const char *);
2027    for (group=sym_ptr; group; group = group->previous)
2028        marks[group->level] = from_start + group->strbeg;
2029    marks[sym_ptr->level+1] = from_end+1;
2030    for (m = marks; *m < from_ptr; m++)
2031        *m = to_start + (*m-from_start);
2032
2033    for (;from_ptr < from_end; from_ptr++) {
2034        while (*m == from_ptr) *m++ = to_ptr;
2035        to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2036    }
2037    *to_ptr = 0;
2038
2039    while (*m == from_ptr) *m++ = to_ptr;
2040    if (m != marks + sym_ptr->level+1) {
2041        Safefree(marks);
2042        Safefree(to_start);
2043        Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, "
2044                   "level=%d", m, marks, sym_ptr->level);
2045    }
2046    for (group=sym_ptr; group; group = group->previous)
2047        group->strbeg = marks[group->level] - to_start;
2048    Safefree(marks);
2049
2050    if (SvOOK(sv)) {
2051        if (SvIVX(sv)) {
2052            SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2053            from_start -= SvIVX(sv);
2054            SvIV_set(sv, 0);
2055        }
2056        SvFLAGS(sv) &= ~SVf_OOK;
2057    }
2058    if (SvLEN(sv) != 0)
2059        Safefree(from_start);
2060    SvPV_set(sv, to_start);
2061    SvCUR_set(sv, to_ptr - to_start);
2062    SvLEN_set(sv, len);
2063    SvUTF8_on(sv);
2064}
2065
2066/* Exponential string grower. Makes string extension effectively O(n)
2067   needed says how many extra bytes we need (not counting the final '\0')
2068   Only grows the string if there is an actual lack of space
2069*/
2070STATIC char *
2071S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2072    const STRLEN cur = SvCUR(sv);
2073    const STRLEN len = SvLEN(sv);
2074    STRLEN extend;
2075
2076    PERL_ARGS_ASSERT_SV_EXP_GROW;
2077
2078    if (len - cur > needed) return SvPVX(sv);
2079    extend = needed > len ? needed : len;
2080    return SvGROW(sv, len+extend+1);
2081}
2082
2083static SV *
2084S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
2085{
2086    SvGETMAGIC(sv);
2087    if (UNLIKELY(SvAMAGIC(sv)))
2088        sv = sv_2num(sv);
2089    if (UNLIKELY(isinfnansv(sv))) {
2090        const I32 c = TYPE_NO_MODIFIERS(datumtype);
2091        const NV nv = SvNV_nomg(sv);
2092        if (c == 'w')
2093            Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
2094        else
2095            Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
2096    }
2097    return sv;
2098}
2099
2100#define SvIV_no_inf(sv,d) \
2101        ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
2102#define SvUV_no_inf(sv,d) \
2103        ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
2104
2105STATIC
2106SV **
2107S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2108{
2109    tempsym_t lookahead;
2110    SSize_t items  = endlist - beglist;
2111    bool found = next_symbol(symptr);
2112    bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2113    bool warn_utf8 = ckWARN(WARN_UTF8);
2114    char* from;
2115
2116    PERL_ARGS_ASSERT_PACK_REC;
2117
2118    if (symptr->level == 0 && found && symptr->code == 'U') {
2119        marked_upgrade(aTHX_ cat, symptr);
2120        symptr->flags |= FLAG_DO_UTF8;
2121        utf8 = 0;
2122    }
2123    symptr->strbeg = SvCUR(cat);
2124
2125    while (found) {
2126        SV *fromstr;
2127        STRLEN fromlen;
2128        SSize_t len;
2129        SV *lengthcode = NULL;
2130        I32 datumtype = symptr->code;
2131        howlen_t howlen = symptr->howlen;
2132        char *start = SvPVX(cat);
2133        char *cur   = start + SvCUR(cat);
2134        bool needs_swap;
2135
2136#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
2137#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
2138
2139        switch (howlen) {
2140          case e_star:
2141            len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2142                0 : items;
2143            break;
2144          default:
2145            /* e_no_len and e_number */
2146            len = symptr->length;
2147            break;
2148        }
2149
2150        if (len) {
2151            packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2152
2153            if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2154                /* We can process this letter. */
2155                STRLEN size = props & PACK_SIZE_MASK;
2156                GROWING2(utf8, cat, start, cur, size, (STRLEN)len);
2157            }
2158        }
2159
2160        /* Look ahead for next symbol. Do we have code/code? */
2161        lookahead = *symptr;
2162        found = next_symbol(&lookahead);
2163        if (symptr->flags & FLAG_SLASH) {
2164            IV count;
2165            if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2166            if (memCHRs("aAZ", lookahead.code)) {
2167                if (lookahead.howlen == e_number) count = lookahead.length;
2168                else {
2169                    if (items > 0) {
2170                        count = sv_len_utf8(*beglist);
2171                    }
2172                    else count = 0;
2173                    if (lookahead.code == 'Z') count++;
2174                }
2175            } else {
2176                if (lookahead.howlen == e_number && lookahead.length < items)
2177                    count = lookahead.length;
2178                else count = items;
2179            }
2180            lookahead.howlen = e_number;
2181            lookahead.length = count;
2182            lengthcode = sv_2mortal(newSViv(count));
2183        }
2184
2185        needs_swap = NEEDS_SWAP(datumtype);
2186
2187        /* Code inside the switch must take care to properly update
2188           cat (CUR length and '\0' termination) if it updated *cur and
2189           doesn't simply leave using break */
2190        switch (TYPE_NO_ENDIANNESS(datumtype)) {
2191        default:
2192            /* diag_listed_as: Invalid type '%s' in %s */
2193            Perl_croak(aTHX_ "Invalid type '%c' in pack",
2194                       (int) TYPE_NO_MODIFIERS(datumtype));
2195        case '%':
2196            Perl_croak(aTHX_ "'%%' may not be used in pack");
2197
2198        case '.' | TYPE_IS_SHRIEKING:
2199        case '.':
2200            if (howlen == e_star) from = start;
2201            else if (len == 0) from = cur;
2202            else {
2203                tempsym_t *group = symptr;
2204
2205                while (--len && group) group = group->previous;
2206                from = group ? start + group->strbeg : start;
2207            }
2208            fromstr = NEXTFROM;
2209            len = SvIV_no_inf(fromstr, datumtype);
2210            goto resize;
2211        case '@' | TYPE_IS_SHRIEKING:
2212        case '@':
2213            from = start + symptr->strbeg;
2214          resize:
2215            if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2216                if (len >= 0) {
2217                    while (len && from < cur) {
2218                        from += UTF8SKIP(from);
2219                        len--;
2220                    }
2221                    if (from > cur)
2222                        Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2223                    if (len) {
2224                        /* Here we know from == cur */
2225                      grow:
2226                        GROWING(0, cat, start, cur, len);
2227                        Zero(cur, len, char);
2228                        cur += len;
2229                    } else if (from < cur) {
2230                        len = cur - from;
2231                        goto shrink;
2232                    } else goto no_change;
2233                } else {
2234                    cur = from;
2235                    len = -len;
2236                    goto utf8_shrink;
2237                }
2238            else {
2239                len -= cur - from;
2240                if (len > 0) goto grow;
2241                if (len == 0) goto no_change;
2242                len = -len;
2243                goto shrink;
2244            }
2245            break;
2246
2247        case '(': {
2248            tempsym_t savsym = *symptr;
2249            U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2250            symptr->flags |= group_modifiers;
2251            symptr->patend = savsym.grpend;
2252            symptr->level++;
2253            /* cppcheck-suppress autoVariables */
2254            symptr->previous = &lookahead;
2255            while (len--) {
2256                U32 was_utf8;
2257                if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2258                else      symptr->flags &= ~FLAG_PARSE_UTF8;
2259                was_utf8 = SvUTF8(cat);
2260                symptr->patptr = savsym.grpbeg;
2261                beglist = pack_rec(cat, symptr, beglist, endlist);
2262                if (SvUTF8(cat) != was_utf8)
2263                    /* This had better be an upgrade while in utf8==0 mode */
2264                    utf8 = 1;
2265
2266                if (savsym.howlen == e_star && beglist == endlist)
2267                    break;		/* No way to continue */
2268            }
2269            items = endlist - beglist;
2270            lookahead.flags  = symptr->flags & ~group_modifiers;
2271            goto no_change;
2272        }
2273        case 'X' | TYPE_IS_SHRIEKING:
2274            if (!len)			/* Avoid division by 0 */
2275                len = 1;
2276            if (utf8) {
2277                char *hop, *last;
2278                SSize_t l = len;
2279                hop = last = start;
2280                while (hop < cur) {
2281                    hop += UTF8SKIP(hop);
2282                    if (--l == 0) {
2283                        last = hop;
2284                        l = len;
2285                    }
2286                }
2287                if (last > cur)
2288                    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2289                cur = last;
2290                break;
2291            }
2292            len = (cur-start) % len;
2293            /* FALLTHROUGH */
2294        case 'X':
2295            if (utf8) {
2296                if (len < 1) goto no_change;
2297              utf8_shrink:
2298                while (len > 0) {
2299                    if (cur <= start)
2300                        Perl_croak(aTHX_ "'%c' outside of string in pack",
2301                                   (int) TYPE_NO_MODIFIERS(datumtype));
2302                    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2303                        if (cur <= start)
2304                            Perl_croak(aTHX_ "'%c' outside of string in pack",
2305                                       (int) TYPE_NO_MODIFIERS(datumtype));
2306                    }
2307                    len--;
2308                }
2309            } else {
2310              shrink:
2311                if (cur - start < len)
2312                    Perl_croak(aTHX_ "'%c' outside of string in pack",
2313                               (int) TYPE_NO_MODIFIERS(datumtype));
2314                cur -= len;
2315            }
2316            if (cur < start+symptr->strbeg) {
2317                /* Make sure group starts don't point into the void */
2318                tempsym_t *group;
2319                const STRLEN length = cur-start;
2320                for (group = symptr;
2321                     group && length < group->strbeg;
2322                     group = group->previous) group->strbeg = length;
2323                lookahead.strbeg = length;
2324            }
2325            break;
2326        case 'x' | TYPE_IS_SHRIEKING: {
2327            SSize_t ai32;
2328            if (!len)			/* Avoid division by 0 */
2329                len = 1;
2330            if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2331            else      ai32 = (cur - start) % len;
2332            if (ai32 == 0) goto no_change;
2333            len -= ai32;
2334        }
2335        /* FALLTHROUGH */
2336        case 'x':
2337            goto grow;
2338        case 'A':
2339        case 'Z':
2340        case 'a': {
2341            const char *aptr;
2342
2343            fromstr = NEXTFROM;
2344            aptr = SvPV_const(fromstr, fromlen);
2345            if (DO_UTF8(fromstr)) {
2346                const char *end, *s;
2347
2348                if (!utf8 && !SvUTF8(cat)) {
2349                    marked_upgrade(aTHX_ cat, symptr);
2350                    lookahead.flags |= FLAG_DO_UTF8;
2351                    lookahead.strbeg = symptr->strbeg;
2352                    utf8 = 1;
2353                    start = SvPVX(cat);
2354                    cur = start + SvCUR(cat);
2355                }
2356                if (howlen == e_star) {
2357                    if (utf8) goto string_copy;
2358                    len = fromlen+1;
2359                }
2360                s = aptr;
2361                end = aptr + fromlen;
2362                fromlen = datumtype == 'Z' ? len-1 : len;
2363                while ((SSize_t) fromlen > 0 && s < end) {
2364                    s += UTF8SKIP(s);
2365                    fromlen--;
2366                }
2367                if (s > end)
2368                    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2369                if (utf8) {
2370                    len = fromlen;
2371                    if (datumtype == 'Z') len++;
2372                    fromlen = s-aptr;
2373                    len += fromlen;
2374
2375                    goto string_copy;
2376                }
2377                fromlen = len - fromlen;
2378                if (datumtype == 'Z') fromlen--;
2379                if (howlen == e_star) {
2380                    len = fromlen;
2381                    if (datumtype == 'Z') len++;
2382                }
2383                GROWING(0, cat, start, cur, len);
2384                if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2385                                  datumtype | TYPE_IS_PACK))
2386                    Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
2387                               "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu",
2388                               (int)datumtype, aptr, end, cur, fromlen);
2389                cur += fromlen;
2390                len -= fromlen;
2391            } else if (utf8) {
2392                if (howlen == e_star) {
2393                    len = fromlen;
2394                    if (datumtype == 'Z') len++;
2395                }
2396                if (len <= (SSize_t) fromlen) {
2397                    fromlen = len;
2398                    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2399                }
2400                /* assumes a byte expands to at most UTF8_EXPAND bytes on
2401                   upgrade, so:
2402                   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2403                GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2404                len -= fromlen;
2405                while (fromlen > 0) {
2406                    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2407                    aptr++;
2408                    fromlen--;
2409                }
2410            } else {
2411              string_copy:
2412                if (howlen == e_star) {
2413                    len = fromlen;
2414                    if (datumtype == 'Z') len++;
2415                }
2416                if (len <= (SSize_t) fromlen) {
2417                    fromlen = len;
2418                    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2419                }
2420                GROWING(0, cat, start, cur, len);
2421                Copy(aptr, cur, fromlen, char);
2422                cur += fromlen;
2423                len -= fromlen;
2424            }
2425            memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2426            cur += len;
2427            SvTAINT(cat);
2428            break;
2429        }
2430        case 'B':
2431        case 'b': {
2432            const char *str, *end;
2433            SSize_t l, field_len;
2434            U8 bits;
2435            bool utf8_source;
2436            U32 utf8_flags;
2437
2438            fromstr = NEXTFROM;
2439            str = SvPV_const(fromstr, fromlen);
2440            end = str + fromlen;
2441            if (DO_UTF8(fromstr)) {
2442                utf8_source = TRUE;
2443                utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2444            } else {
2445                utf8_source = FALSE;
2446                utf8_flags  = 0; /* Unused, but keep compilers happy */
2447            }
2448            if (howlen == e_star) len = fromlen;
2449            field_len = (len+7)/8;
2450            GROWING(utf8, cat, start, cur, field_len);
2451            if (len > (SSize_t)fromlen) len = fromlen;
2452            bits = 0;
2453            l = 0;
2454            if (datumtype == 'B')
2455                while (l++ < len) {
2456                    if (utf8_source) {
2457                        UV val = 0;
2458                        NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2459                        bits |= val & 1;
2460                    } else bits |= *str++ & 1;
2461                    if (l & 7) bits <<= 1;
2462                    else {
2463                        PUSH_BYTE(utf8, cur, bits);
2464                        bits = 0;
2465                    }
2466                }
2467            else
2468                /* datumtype == 'b' */
2469                while (l++ < len) {
2470                    if (utf8_source) {
2471                        UV val = 0;
2472                        NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2473                        if (val & 1) bits |= 0x80;
2474                    } else if (*str++ & 1)
2475                        bits |= 0x80;
2476                    if (l & 7) bits >>= 1;
2477                    else {
2478                        PUSH_BYTE(utf8, cur, bits);
2479                        bits = 0;
2480                    }
2481                }
2482            l--;
2483            if (l & 7) {
2484                if (datumtype == 'B')
2485                    bits <<= 7 - (l & 7);
2486                else
2487                    bits >>= 7 - (l & 7);
2488                PUSH_BYTE(utf8, cur, bits);
2489                l += 7;
2490            }
2491            /* Determine how many chars are left in the requested field */
2492            l /= 8;
2493            if (howlen == e_star) field_len = 0;
2494            else field_len -= l;
2495            Zero(cur, field_len, char);
2496            cur += field_len;
2497            break;
2498        }
2499        case 'H':
2500        case 'h': {
2501            const char *str, *end;
2502            SSize_t l, field_len;
2503            U8 bits;
2504            bool utf8_source;
2505            U32 utf8_flags;
2506
2507            fromstr = NEXTFROM;
2508            str = SvPV_const(fromstr, fromlen);
2509            end = str + fromlen;
2510            if (DO_UTF8(fromstr)) {
2511                utf8_source = TRUE;
2512                utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2513            } else {
2514                utf8_source = FALSE;
2515                utf8_flags  = 0; /* Unused, but keep compilers happy */
2516            }
2517            if (howlen == e_star) len = fromlen;
2518            field_len = (len+1)/2;
2519            GROWING(utf8, cat, start, cur, field_len);
2520            if (!utf8_source && len > (SSize_t)fromlen) len = fromlen;
2521            bits = 0;
2522            l = 0;
2523            if (datumtype == 'H')
2524                while (l++ < len) {
2525                    if (utf8_source) {
2526                        UV val = 0;
2527                        NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2528                        if (val < 256 && isALPHA(val))
2529                            bits |= (val + 9) & 0xf;
2530                        else
2531                            bits |= val & 0xf;
2532                    } else if (isALPHA(*str))
2533                        bits |= (*str++ + 9) & 0xf;
2534                    else
2535                        bits |= *str++ & 0xf;
2536                    if (l & 1) bits <<= 4;
2537                    else {
2538                        PUSH_BYTE(utf8, cur, bits);
2539                        bits = 0;
2540                    }
2541                }
2542            else
2543                while (l++ < len) {
2544                    if (utf8_source) {
2545                        UV val = 0;
2546                        NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2547                        if (val < 256 && isALPHA(val))
2548                            bits |= ((val + 9) & 0xf) << 4;
2549                        else
2550                            bits |= (val & 0xf) << 4;
2551                    } else if (isALPHA(*str))
2552                        bits |= ((*str++ + 9) & 0xf) << 4;
2553                    else
2554                        bits |= (*str++ & 0xf) << 4;
2555                    if (l & 1) bits >>= 4;
2556                    else {
2557                        PUSH_BYTE(utf8, cur, bits);
2558                        bits = 0;
2559                    }
2560                }
2561            l--;
2562            if (l & 1) {
2563                PUSH_BYTE(utf8, cur, bits);
2564                l++;
2565            }
2566            /* Determine how many chars are left in the requested field */
2567            l /= 2;
2568            if (howlen == e_star) field_len = 0;
2569            else field_len -= l;
2570            Zero(cur, field_len, char);
2571            cur += field_len;
2572            break;
2573        }
2574        case 'c':
2575            while (len-- > 0) {
2576                IV aiv;
2577                fromstr = NEXTFROM;
2578                aiv = SvIV_no_inf(fromstr, datumtype);
2579                if ((-128 > aiv || aiv > 127))
2580                    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2581                                   "Character in 'c' format wrapped in pack");
2582                PUSH_BYTE(utf8, cur, (U8)aiv);
2583            }
2584            break;
2585        case 'C':
2586            if (len == 0) {
2587                utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2588                break;
2589            }
2590            while (len-- > 0) {
2591                IV aiv;
2592                fromstr = NEXTFROM;
2593                aiv = SvIV_no_inf(fromstr, datumtype);
2594                if ((0 > aiv || aiv > 0xff))
2595                    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2596                                   "Character in 'C' format wrapped in pack");
2597                PUSH_BYTE(utf8, cur, (U8)aiv);
2598            }
2599            break;
2600        case 'W': {
2601            char *end;
2602            U8 in_bytes = (U8)IN_BYTES;
2603
2604            end = start+SvLEN(cat)-1;
2605            if (utf8) end -= UTF8_MAXLEN-1;
2606            while (len-- > 0) {
2607                UV auv;
2608                fromstr = NEXTFROM;
2609                auv = SvUV_no_inf(fromstr, datumtype);
2610                if (in_bytes) auv = auv % 0x100;
2611                if (utf8) {
2612                  W_utf8:
2613                    if (cur >= end) {
2614                        *cur = '\0';
2615                        SvCUR_set(cat, cur - start);
2616
2617                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2618                        end = start+SvLEN(cat)-UTF8_MAXLEN;
2619                    }
2620                    cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2621                } else {
2622                    if (auv >= 0x100) {
2623                        if (!SvUTF8(cat)) {
2624                            *cur = '\0';
2625                            SvCUR_set(cat, cur - start);
2626                            marked_upgrade(aTHX_ cat, symptr);
2627                            lookahead.flags |= FLAG_DO_UTF8;
2628                            lookahead.strbeg = symptr->strbeg;
2629                            utf8 = 1;
2630                            start = SvPVX(cat);
2631                            cur = start + SvCUR(cat);
2632                            end = start+SvLEN(cat)-UTF8_MAXLEN;
2633                            goto W_utf8;
2634                        }
2635                        Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2636                                       "Character in 'W' format wrapped in pack");
2637                        auv = (U8) auv;
2638                    }
2639                    if (cur >= end) {
2640                        *cur = '\0';
2641                        SvCUR_set(cat, cur - start);
2642                        GROWING(0, cat, start, cur, len+1);
2643                        end = start+SvLEN(cat)-1;
2644                    }
2645                    *(U8 *) cur++ = (U8)auv;
2646                }
2647            }
2648            break;
2649        }
2650        case 'U': {
2651            char *end;
2652
2653            if (len == 0) {
2654                if (!(symptr->flags & FLAG_DO_UTF8)) {
2655                    marked_upgrade(aTHX_ cat, symptr);
2656                    lookahead.flags |= FLAG_DO_UTF8;
2657                    lookahead.strbeg = symptr->strbeg;
2658                }
2659                utf8 = 0;
2660                goto no_change;
2661            }
2662
2663            end = start+SvLEN(cat);
2664            if (!utf8) end -= UTF8_MAXLEN;
2665            while (len-- > 0) {
2666                UV auv;
2667                fromstr = NEXTFROM;
2668                auv = SvUV_no_inf(fromstr, datumtype);
2669                if (utf8) {
2670                    U8 buffer[UTF8_MAXLEN+1], *endb;
2671                    endb = uvchr_to_utf8_flags(buffer, auv, 0);
2672                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
2673                        *cur = '\0';
2674                        SvCUR_set(cat, cur - start);
2675                        GROWING(0, cat, start, cur,
2676                                len+(endb-buffer)*UTF8_EXPAND);
2677                        end = start+SvLEN(cat);
2678                    }
2679                    cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
2680                } else {
2681                    if (cur >= end) {
2682                        *cur = '\0';
2683                        SvCUR_set(cat, cur - start);
2684                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
2685                        end = start+SvLEN(cat)-UTF8_MAXLEN;
2686                    }
2687                    cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
2688                }
2689            }
2690            break;
2691        }
2692        /* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
2693        case 'f':
2694            while (len-- > 0) {
2695                float afloat;
2696                NV anv;
2697                fromstr = NEXTFROM;
2698                anv = SvNV(fromstr);
2699# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2700                /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2701                 * on Alpha; fake it if we don't have them.
2702                 */
2703                if (anv > FLT_MAX)
2704                    afloat = FLT_MAX;
2705                else if (anv < -FLT_MAX)
2706                    afloat = -FLT_MAX;
2707                else afloat = (float)anv;
2708# else
2709#  if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2710                if(Perl_isnan(anv))
2711                    afloat = (float)NV_NAN;
2712                else
2713#  endif
2714#  ifdef NV_INF
2715                /* a simple cast to float is undefined if outside
2716                 * the range of values that can be represented */
2717                afloat = (float)(anv >  FLT_MAX ?  NV_INF :
2718                                 anv < -FLT_MAX ? -NV_INF : anv);
2719#  endif
2720# endif
2721                PUSH_VAR(utf8, cur, afloat, needs_swap);
2722            }
2723            break;
2724        case 'd':
2725            while (len-- > 0) {
2726                double adouble;
2727                NV anv;
2728                fromstr = NEXTFROM;
2729                anv = SvNV(fromstr);
2730# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
2731                /* IEEE fp overflow shenanigans are unavailable on VAX and optional
2732                 * on Alpha; fake it if we don't have them.
2733                 */
2734                if (anv > DBL_MAX)
2735                    adouble = DBL_MAX;
2736                else if (anv < -DBL_MAX)
2737                    adouble = -DBL_MAX;
2738                else adouble = (double)anv;
2739# else
2740                adouble = (double)anv;
2741# endif
2742                PUSH_VAR(utf8, cur, adouble, needs_swap);
2743            }
2744            break;
2745        case 'F': {
2746            NV_bytes anv;
2747            Zero(&anv, 1, NV); /* can be long double with unused bits */
2748            while (len-- > 0) {
2749                fromstr = NEXTFROM;
2750#ifdef __GNUC__
2751                /* to work round a gcc/x86 bug; don't use SvNV */
2752                anv.nv = sv_2nv(fromstr);
2753#    if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
2754         && LONG_DOUBLESIZE > 10
2755                /* GCC sometimes overwrites the padding in the
2756                   assignment above */
2757                Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
2758#    endif
2759#else
2760                anv.nv = SvNV(fromstr);
2761#endif
2762                PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap);
2763            }
2764            break;
2765        }
2766#if defined(HAS_LONG_DOUBLE)
2767        case 'D': {
2768            ld_bytes aldouble;
2769            /* long doubles can have unused bits, which may be nonzero */
2770            Zero(&aldouble, 1, long double);
2771            while (len-- > 0) {
2772                fromstr = NEXTFROM;
2773#  ifdef __GNUC__
2774                /* to work round a gcc/x86 bug; don't use SvNV */
2775                aldouble.ld = (long double)sv_2nv(fromstr);
2776#    if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
2777                /* GCC sometimes overwrites the padding in the
2778                   assignment above */
2779                Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
2780#    endif
2781#  else
2782                aldouble.ld = (long double)SvNV(fromstr);
2783#  endif
2784                PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes),
2785                           needs_swap);
2786            }
2787            break;
2788        }
2789#endif
2790        case 'n' | TYPE_IS_SHRIEKING:
2791        case 'n':
2792            while (len-- > 0) {
2793                I16 ai16;
2794                fromstr = NEXTFROM;
2795                ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2796                ai16 = PerlSock_htons(ai16);
2797                PUSH16(utf8, cur, &ai16, FALSE);
2798            }
2799            break;
2800        case 'v' | TYPE_IS_SHRIEKING:
2801        case 'v':
2802            while (len-- > 0) {
2803                I16 ai16;
2804                fromstr = NEXTFROM;
2805                ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2806                ai16 = htovs(ai16);
2807                PUSH16(utf8, cur, &ai16, FALSE);
2808            }
2809            break;
2810        case 'S' | TYPE_IS_SHRIEKING:
2811#if SHORTSIZE != SIZE16
2812            while (len-- > 0) {
2813                unsigned short aushort;
2814                fromstr = NEXTFROM;
2815                aushort = SvUV_no_inf(fromstr, datumtype);
2816                PUSH_VAR(utf8, cur, aushort, needs_swap);
2817            }
2818            break;
2819#else
2820            /* FALLTHROUGH */
2821#endif
2822        case 'S':
2823            while (len-- > 0) {
2824                U16 au16;
2825                fromstr = NEXTFROM;
2826                au16 = (U16)SvUV_no_inf(fromstr, datumtype);
2827                PUSH16(utf8, cur, &au16, needs_swap);
2828            }
2829            break;
2830        case 's' | TYPE_IS_SHRIEKING:
2831#if SHORTSIZE != SIZE16
2832            while (len-- > 0) {
2833                short ashort;
2834                fromstr = NEXTFROM;
2835                ashort = SvIV_no_inf(fromstr, datumtype);
2836                PUSH_VAR(utf8, cur, ashort, needs_swap);
2837            }
2838            break;
2839#else
2840            /* FALLTHROUGH */
2841#endif
2842        case 's':
2843            while (len-- > 0) {
2844                I16 ai16;
2845                fromstr = NEXTFROM;
2846                ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
2847                PUSH16(utf8, cur, &ai16, needs_swap);
2848            }
2849            break;
2850        case 'I':
2851        case 'I' | TYPE_IS_SHRIEKING:
2852            while (len-- > 0) {
2853                unsigned int auint;
2854                fromstr = NEXTFROM;
2855                auint = SvUV_no_inf(fromstr, datumtype);
2856                PUSH_VAR(utf8, cur, auint, needs_swap);
2857            }
2858            break;
2859        case 'j':
2860            while (len-- > 0) {
2861                IV aiv;
2862                fromstr = NEXTFROM;
2863                aiv = SvIV_no_inf(fromstr, datumtype);
2864                PUSH_VAR(utf8, cur, aiv, needs_swap);
2865            }
2866            break;
2867        case 'J':
2868            while (len-- > 0) {
2869                UV auv;
2870                fromstr = NEXTFROM;
2871                auv = SvUV_no_inf(fromstr, datumtype);
2872                PUSH_VAR(utf8, cur, auv, needs_swap);
2873            }
2874            break;
2875        case 'w':
2876            while (len-- > 0) {
2877                NV anv;
2878                fromstr = NEXTFROM;
2879                S_sv_check_infnan(aTHX_ fromstr, datumtype);
2880                anv = SvNV_nomg(fromstr);
2881
2882                if (anv < 0) {
2883                    *cur = '\0';
2884                    SvCUR_set(cat, cur - start);
2885                    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
2886                }
2887
2888                /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
2889                   which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
2890                   any negative IVs will have already been got by the croak()
2891                   above. IOK is untrue for fractions, so we test them
2892                   against UV_MAX_P1.  */
2893                if (SvIOK(fromstr) || anv < UV_MAX_P1) {
2894                    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
2895                    char  *in = buf + sizeof(buf);
2896                    UV     auv = SvUV_nomg(fromstr);
2897
2898                    do {
2899                        *--in = (char)((auv & 0x7f) | 0x80);
2900                        auv >>= 7;
2901                    } while (auv);
2902                    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2903                    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2904                                       in, (buf + sizeof(buf)) - in);
2905                } else if (SvPOKp(fromstr))
2906                    goto w_string;
2907                else if (SvNOKp(fromstr)) {
2908                    /* 10**NV_MAX_10_EXP is the largest power of 10
2909                       so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable
2910                       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
2911                       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
2912                       And with that many bytes only Inf can overflow.
2913                       Some C compilers are strict about integral constant
2914                       expressions so we conservatively divide by a slightly
2915                       smaller integer instead of multiplying by the exact
2916                       floating-point value.
2917                    */
2918#ifdef NV_MAX_10_EXP
2919                    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
2920                    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
2921#else
2922                    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
2923                    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
2924#endif
2925                    char  *in = buf + sizeof(buf);
2926
2927                    anv = Perl_floor(anv);
2928                    do {
2929                        const NV next = Perl_floor(anv / 128);
2930                        if (in <= buf)  /* this cannot happen ;-) */
2931                            Perl_croak(aTHX_ "Cannot compress integer in pack");
2932                        *--in = (unsigned char)(anv - (next * 128)) | 0x80;
2933                        anv = next;
2934                    } while (anv > 0);
2935                    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
2936                    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2937                                       in, (buf + sizeof(buf)) - in);
2938                } else {
2939                    const char     *from;
2940                    char           *result, *in;
2941                    SV             *norm;
2942                    STRLEN          len;
2943                    bool            done;
2944
2945                  w_string:
2946                    /* Copy string and check for compliance */
2947                    from = SvPV_nomg_const(fromstr, len);
2948                    if ((norm = is_an_int(from, len)) == NULL)
2949                        Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
2950
2951                    Newx(result, len, char);
2952                    in = result + len;
2953                    done = FALSE;
2954                    while (!done) *--in = div128(norm, &done) | 0x80;
2955                    result[len - 1] &= 0x7F; /* clear continue bit */
2956                    PUSH_GROWING_BYTES(utf8, cat, start, cur,
2957                                       in, (result + len) - in);
2958                    Safefree(result);
2959                    SvREFCNT_dec(norm);	/* free norm */
2960                }
2961            }
2962            break;
2963        case 'i':
2964        case 'i' | TYPE_IS_SHRIEKING:
2965            while (len-- > 0) {
2966                int aint;
2967                fromstr = NEXTFROM;
2968                aint = SvIV_no_inf(fromstr, datumtype);
2969                PUSH_VAR(utf8, cur, aint, needs_swap);
2970            }
2971            break;
2972        case 'N' | TYPE_IS_SHRIEKING:
2973        case 'N':
2974            while (len-- > 0) {
2975                U32 au32;
2976                fromstr = NEXTFROM;
2977                au32 = SvUV_no_inf(fromstr, datumtype);
2978                au32 = PerlSock_htonl(au32);
2979                PUSH32(utf8, cur, &au32, FALSE);
2980            }
2981            break;
2982        case 'V' | TYPE_IS_SHRIEKING:
2983        case 'V':
2984            while (len-- > 0) {
2985                U32 au32;
2986                fromstr = NEXTFROM;
2987                au32 = SvUV_no_inf(fromstr, datumtype);
2988                au32 = htovl(au32);
2989                PUSH32(utf8, cur, &au32, FALSE);
2990            }
2991            break;
2992        case 'L' | TYPE_IS_SHRIEKING:
2993#if LONGSIZE != SIZE32
2994            while (len-- > 0) {
2995                unsigned long aulong;
2996                fromstr = NEXTFROM;
2997                aulong = SvUV_no_inf(fromstr, datumtype);
2998                PUSH_VAR(utf8, cur, aulong, needs_swap);
2999            }
3000            break;
3001#else
3002            /* Fall though! */
3003#endif
3004        case 'L':
3005            while (len-- > 0) {
3006                U32 au32;
3007                fromstr = NEXTFROM;
3008                au32 = SvUV_no_inf(fromstr, datumtype);
3009                PUSH32(utf8, cur, &au32, needs_swap);
3010            }
3011            break;
3012        case 'l' | TYPE_IS_SHRIEKING:
3013#if LONGSIZE != SIZE32
3014            while (len-- > 0) {
3015                long along;
3016                fromstr = NEXTFROM;
3017                along = SvIV_no_inf(fromstr, datumtype);
3018                PUSH_VAR(utf8, cur, along, needs_swap);
3019            }
3020            break;
3021#else
3022            /* Fall though! */
3023#endif
3024        case 'l':
3025            while (len-- > 0) {
3026                I32 ai32;
3027                fromstr = NEXTFROM;
3028                ai32 = SvIV_no_inf(fromstr, datumtype);
3029                PUSH32(utf8, cur, &ai32, needs_swap);
3030            }
3031            break;
3032#if defined(HAS_QUAD) && IVSIZE >= 8
3033        case 'Q':
3034            while (len-- > 0) {
3035                Uquad_t auquad;
3036                fromstr = NEXTFROM;
3037                auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
3038                PUSH_VAR(utf8, cur, auquad, needs_swap);
3039            }
3040            break;
3041        case 'q':
3042            while (len-- > 0) {
3043                Quad_t aquad;
3044                fromstr = NEXTFROM;
3045                aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
3046                PUSH_VAR(utf8, cur, aquad, needs_swap);
3047            }
3048            break;
3049#endif
3050        case 'P':
3051            len = 1;		/* assume SV is correct length */
3052            GROWING(utf8, cat, start, cur, sizeof(char *));
3053            /* FALLTHROUGH */
3054        case 'p':
3055            while (len-- > 0) {
3056                const char *aptr;
3057
3058                fromstr = NEXTFROM;
3059                SvGETMAGIC(fromstr);
3060                if (!SvOK(fromstr)) aptr = NULL;
3061                else {
3062                    /* XXX better yet, could spirit away the string to
3063                     * a safe spot and hang on to it until the result
3064                     * of pack() (and all copies of the result) are
3065                     * gone.
3066                     */
3067                    if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
3068                         || (SvPADTMP(fromstr) &&
3069                             !SvREADONLY(fromstr)))) {
3070                        Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3071                                       "Attempt to pack pointer to temporary value");
3072                    }
3073                    if (SvPOK(fromstr) || SvNIOK(fromstr))
3074                        aptr = SvPV_nomg_const_nolen(fromstr);
3075                    else
3076                        aptr = SvPV_force_flags_nolen(fromstr, 0);
3077                }
3078                PUSH_VAR(utf8, cur, aptr, needs_swap);
3079            }
3080            break;
3081        case 'u': {
3082            const char *aptr, *aend;
3083            bool from_utf8;
3084
3085            fromstr = NEXTFROM;
3086            if (len <= 2) len = 45;
3087            else len = len / 3 * 3;
3088            if (len >= 64) {
3089                Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3090                               "Field too wide in 'u' format in pack");
3091                len = 63;
3092            }
3093            aptr = SvPV_const(fromstr, fromlen);
3094            from_utf8 = DO_UTF8(fromstr);
3095            if (from_utf8) {
3096                aend = aptr + fromlen;
3097                fromlen = sv_len_utf8_nomg(fromstr);
3098            } else aend = NULL; /* Unused, but keep compilers happy */
3099            GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3100            while (fromlen > 0) {
3101                U8 *end;
3102                SSize_t todo;
3103                U8 hunk[1+63/3*4+1];
3104
3105                if ((SSize_t)fromlen > len)
3106                    todo = len;
3107                else
3108                    todo = fromlen;
3109                if (from_utf8) {
3110                    char buffer[64];
3111                    if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3112                                      'u' | TYPE_IS_PACK)) {
3113                        *cur = '\0';
3114                        SvCUR_set(cat, cur - start);
3115                        Perl_croak(aTHX_ "panic: string is shorter than advertised, "
3116                                   "aptr=%p, aend=%p, buffer=%p, todo=%zd",
3117                                   aptr, aend, buffer, todo);
3118                    }
3119                    end = doencodes(hunk, (const U8 *)buffer, todo);
3120                } else {
3121                    end = doencodes(hunk, (const U8 *)aptr, todo);
3122                    aptr += todo;
3123                }
3124                PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
3125                fromlen -= todo;
3126            }
3127            break;
3128        }
3129        }
3130        *cur = '\0';
3131        SvCUR_set(cat, cur - start);
3132      no_change:
3133        *symptr = lookahead;
3134    }
3135    return beglist;
3136}
3137#undef NEXTFROM
3138
3139
3140PP(pp_pack)
3141{
3142    dSP; dMARK; dORIGMARK; dTARGET;
3143    SV *cat = TARG;
3144    STRLEN fromlen;
3145    SV *pat_sv = *++MARK;
3146    const char *pat = SvPV_const(pat_sv, fromlen);
3147    const char *patend = pat + fromlen;
3148
3149    MARK++;
3150    SvPVCLEAR(cat);
3151    SvUTF8_off(cat);
3152
3153    packlist(cat, pat, patend, MARK, SP + 1);
3154
3155    if (SvUTF8(cat)) {
3156        STRLEN result_len;
3157        const char * result = SvPV_nomg(cat, result_len);
3158        const U8 * error_pos;
3159
3160        if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) {
3161            _force_out_malformed_utf8_message(error_pos,
3162                                              (U8 *) result + result_len,
3163                                              0, /* no flags */
3164                                              1 /* Die */
3165                                            );
3166            NOT_REACHED; /* NOTREACHED */
3167        }
3168    }
3169
3170    SvSETMAGIC(cat);
3171    SP = ORIGMARK;
3172    PUSHs(cat);
3173    RETURN;
3174}
3175
3176/*
3177 * ex: set ts=8 sts=4 sw=4 et:
3178 */
3179