1/*
2  Copyright (c) 1990-2008 Info-ZIP.  All rights reserved.
3
4  See the accompanying file LICENSE, version 2000-Apr-09 or later
5  (the contents of which are also included in unzip.h) for terms of use.
6  If, for some reason, all these files are missing, the Info-ZIP license
7  also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8*/
9/*---------------------------------------------------------------------------
10
11  unshrink.c                     version 1.22                     19 Mar 2008
12
13
14       NOTE:  This code may or may not infringe on the so-called "Welch
15       patent" owned by Unisys.  (From reading the patent, it appears
16       that a pure LZW decompressor is *not* covered, but this claim has
17       not been tested in court, and Unisys is reported to believe other-
18       wise.)  It is therefore the responsibility of the user to acquire
19       whatever license(s) may be required for legal use of this code.
20
21       THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
22       IN VIOLATION OF APPLICABLE PATENT LAW.
23
24
25  Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
26  up to 13 bits; in addition, there is provision for partial clearing of
27  leaf nodes.  PKWARE uses the special code 256 (decimal) to indicate a
28  change in code size or a partial clear of the code tree:  256,1 for the
29  former and 256,2 for the latter.  [Note that partial clearing can "orphan"
30  nodes:  the parent-to-be can be cleared before its new child is added,
31  but the child is added anyway (as an orphan, as though the parent still
32  existed).  When the tree fills up to the point where the parent node is
33  reused, the orphan is effectively "adopted."  Versions prior to 1.05 were
34  affected more due to greater use of pointers (to children and siblings
35  as well as parents).]
36
37  This replacement version of unshrink.c was written from scratch.  It is
38  based only on the algorithms described in Mark Nelson's _The Data Compres-
39  sion Book_ and in Terry Welch's original paper in the June 1984 issue of
40  IEEE _Computer_; no existing source code, including any in Nelson's book,
41  was used.
42
43  Memory requirements have been reduced in this version and are now no more
44  than the original Sam Smith code.  This is still larger than any of the
45  other algorithms:  at a minimum, 8K+8K+16K (stack+values+parents) assuming
46  16-bit short ints, and this does not even include the output buffer (the
47  other algorithms leave the uncompressed data in the work area, typically
48  called slide[]).  For machines with a 64KB data space this is a problem,
49  particularly when text conversion is required and line endings have more
50  than one character.  UnZip's solution is to use two roughly equal halves
51  of outbuf for the ASCII conversion in such a case; the "unshrink" argument
52  to flush() signals that this is the case.
53
54  For large-memory machines, a second outbuf is allocated for translations,
55  but only if unshrinking and only if translations are required.
56
57              | binary mode  |        text mode
58    ---------------------------------------------------
59    big mem   |  big outbuf  | big outbuf + big outbuf2  <- malloc'd here
60    small mem | small outbuf | half + half small outbuf
61
62  Copyright 1994, 1995 Greg Roelofs.  See the accompanying file "COPYING"
63  in UnZip 5.20 (or later) source or binary distributions.
64
65  ---------------------------------------------------------------------------*/
66
67
68#define __UNSHRINK_C    /* identifies this source module */
69#define UNZIP_INTERNAL
70#include "unzip.h"
71
72
73#ifndef LZW_CLEAN
74
75static void  partial_clear  OF((__GPRO__ int lastcodeused));
76
77#ifdef DEBUG
78#  define OUTDBG(c) \
79   if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr);
80#else
81#  define OUTDBG(c)
82#endif
83
84/* HSIZE is defined as 2^13 (8192) in unzip.h (resp. unzpriv.h */
85#define BOGUSCODE  256
86#define FLAG_BITS  parent        /* upper bits of parent[] used as flag bits */
87#define CODE_MASK  (HSIZE - 1)   /* 0x1fff (lower bits are parent's index) */
88#define FREE_CODE  HSIZE         /* 0x2000 (code is unused or was cleared) */
89#define HAS_CHILD  (HSIZE << 1)  /* 0x4000 (code has a child--do not clear) */
90
91#define parent G.area.shrink.Parent
92#define Value  G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
93#define stack  G.area.shrink.Stack
94
95
96/***********************/
97/* Function unshrink() */
98/***********************/
99
100int unshrink(__G)
101     __GDEF
102{
103    uch *stacktop = stack + (HSIZE - 1);
104    register uch *newstr;
105    uch finalval;
106    int codesize=9, len, error;
107    shrint code, oldcode, curcode;
108    shrint lastfreecode;
109    unsigned int outbufsiz;
110#if (defined(DLL) && !defined(NO_SLIDE_REDIR))
111    /* Normally realbuf and outbuf will be the same.  However, if the data
112     * are redirected to a large memory buffer, realbuf will point to the
113     * new location while outbuf will remain pointing to the malloc'd
114     * memory buffer. */
115    uch *realbuf = G.outbuf;
116#else
117#   define realbuf G.outbuf
118#endif
119
120
121/*---------------------------------------------------------------------------
122    Initialize various variables.
123  ---------------------------------------------------------------------------*/
124
125    lastfreecode = BOGUSCODE;
126
127#ifndef VMS     /* VMS uses its own buffer scheme for textmode flush(). */
128#ifndef SMALL_MEM
129    /* non-memory-limited machines:  allocate second (large) buffer for
130     * textmode conversion in flush(), but only if needed */
131    if (G.pInfo->textmode && !G.outbuf2 &&
132        (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
133        return PK_MEM3;
134#endif
135#endif /* !VMS */
136
137    for (code = 0;  code < BOGUSCODE;  ++code) {
138        Value[code] = (uch)code;
139        parent[code] = BOGUSCODE;
140    }
141    for (code = BOGUSCODE+1;  code < HSIZE;  ++code)
142        parent[code] = FREE_CODE;
143
144#if (defined(DLL) && !defined(NO_SLIDE_REDIR))
145    if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
146        realbuf = G.redirect_buffer;
147        outbufsiz = (unsigned)G.redirect_size;
148    } else
149#endif
150#ifdef DLL
151    if (G.pInfo->textmode && !G.redirect_data)
152#else
153    if (G.pInfo->textmode)
154#endif
155        outbufsiz = RAWBUFSIZ;
156    else
157        outbufsiz = OUTBUFSIZ;
158    G.outptr = realbuf;
159    G.outcnt = 0L;
160
161/*---------------------------------------------------------------------------
162    Get and output first code, then loop over remaining ones.
163  ---------------------------------------------------------------------------*/
164
165    READBITS(codesize, oldcode)
166    if (G.zipeof)
167        return PK_OK;
168
169    finalval = (uch)oldcode;
170    OUTDBG(finalval)
171    *G.outptr++ = finalval;
172    ++G.outcnt;
173
174    while (TRUE) {
175        READBITS(codesize, code)
176        if (G.zipeof)
177            break;
178        if (code == BOGUSCODE) {   /* possible to have consecutive escapes? */
179            READBITS(codesize, code)
180            if (G.zipeof)
181                break;
182            if (code == 1) {
183                ++codesize;
184                Trace((stderr, " (codesize now %d bits)\n", codesize));
185                if (codesize > MAX_BITS) return PK_ERR;
186            } else if (code == 2) {
187                Trace((stderr, " (partial clear code)\n"));
188                /* clear leafs (nodes with no children) */
189                partial_clear(__G__ lastfreecode);
190                Trace((stderr, " (done with partial clear)\n"));
191                lastfreecode = BOGUSCODE; /* reset start of free-node search */
192            }
193            continue;
194        }
195
196    /*-----------------------------------------------------------------------
197        Translate code:  traverse tree from leaf back to root.
198      -----------------------------------------------------------------------*/
199
200        newstr = stacktop;
201        curcode = code;
202
203        if (parent[code] == FREE_CODE) {
204            /* or (FLAG_BITS[code] & FREE_CODE)? */
205            Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
206              oldcode));
207            *newstr-- = finalval;
208            code = oldcode;
209        }
210
211        while (code != BOGUSCODE) {
212            if (newstr < stack) {
213                /* Bogus compression stream caused buffer underflow! */
214                Trace((stderr, "unshrink stack overflow!\n"));
215                return PK_ERR;
216            }
217            if (parent[code] == FREE_CODE) {
218                /* or (FLAG_BITS[code] & FREE_CODE)? */
219                Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n",
220                  code, oldcode));
221                *newstr-- = finalval;
222                code = oldcode;
223            } else {
224                *newstr-- = Value[code];
225                code = (shrint)(parent[code] & CODE_MASK);
226            }
227        }
228
229        len = (int)(stacktop - newstr++);
230        finalval = *newstr;
231
232    /*-----------------------------------------------------------------------
233        Write expanded string in reverse order to output buffer.
234      -----------------------------------------------------------------------*/
235
236        Trace((stderr,
237          "code %4d; oldcode %4d; char %3d (%c); len %d; string [", curcode,
238          oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr,
239          len));
240
241        {
242            register uch *p;
243
244            for (p = newstr;  p < newstr+len;  ++p) {
245                *G.outptr++ = *p;
246                OUTDBG(*p)
247                if (++G.outcnt == outbufsiz) {
248                    Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
249                    if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
250                        Trace((stderr, "unshrink:  flush() error (%d)\n",
251                          error));
252                        return error;
253                    }
254                    G.outptr = realbuf;
255                    G.outcnt = 0L;
256                    Trace((stderr, "done with flush()\n"));
257                }
258            }
259        }
260
261    /*-----------------------------------------------------------------------
262        Add new leaf (first character of newstr) to tree as child of oldcode.
263      -----------------------------------------------------------------------*/
264
265        /* search for freecode */
266        code = (shrint)(lastfreecode + 1);
267        /* add if-test before loop for speed? */
268        while ((code < HSIZE) && (parent[code] != FREE_CODE))
269            ++code;
270        lastfreecode = code;
271        Trace((stderr, "]; newcode %d\n", code));
272        if (code >= HSIZE)
273            /* invalid compressed data caused max-code overflow! */
274            return PK_ERR;
275
276        Value[code] = finalval;
277        parent[code] = oldcode;
278        oldcode = curcode;
279
280    }
281
282/*---------------------------------------------------------------------------
283    Flush any remaining data and return to sender...
284  ---------------------------------------------------------------------------*/
285
286    if (G.outcnt > 0L) {
287        Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
288        if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
289            Trace((stderr, "unshrink:  flush() error (%d)\n", error));
290            return error;
291        }
292        Trace((stderr, "done with flush()\n"));
293    }
294
295    return PK_OK;
296
297} /* end function unshrink() */
298
299
300
301
302
303/****************************/
304/* Function partial_clear() */      /* no longer recursive... */
305/****************************/
306
307static void partial_clear(__G__ lastcodeused)
308    __GDEF
309    int lastcodeused;
310{
311    register shrint code;
312
313    /* clear all nodes which have no children (i.e., leaf nodes only) */
314
315    /* first loop:  mark each parent as such */
316    for (code = BOGUSCODE+1;  code <= lastcodeused;  ++code) {
317        register shrint cparent = (shrint)(parent[code] & CODE_MASK);
318
319        if (cparent > BOGUSCODE)
320            FLAG_BITS[cparent] |= HAS_CHILD;   /* set parent's child-bit */
321    }
322
323    /* second loop:  clear all nodes *not* marked as parents; reset flag bits */
324    for (code = BOGUSCODE+1;  code <= lastcodeused;  ++code) {
325        if (FLAG_BITS[code] & HAS_CHILD)    /* just clear child-bit */
326            FLAG_BITS[code] &= ~HAS_CHILD;
327        else {                              /* leaf:  lose it */
328            Trace((stderr, "%d\n", code));
329            parent[code] = FREE_CODE;
330        }
331    }
332
333    return;
334}
335
336#endif /* !LZW_CLEAN */
337