1/*
2  Copyright (c) 1990-2001 Info-ZIP.  All rights reserved.
3
4  See the accompanying file LICENSE, version 2000-Apr-09 or later
5  (the contents of which are also included in unzip.h) for terms of use.
6  If, for some reason, all these files are missing, the Info-ZIP license
7  also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8*/
9/*---------------------------------------------------------------------------
10
11  unshrink.c                     version 1.21                     23 Nov 95
12
13
14       NOTE:  This code may or may not infringe on the so-called "Welch
15       patent" owned by Unisys.  (From reading the patent, it appears
16       that a pure LZW decompressor is *not* covered, but this claim has
17       not been tested in court, and Unisys is reported to believe other-
18       wise.)  It is therefore the responsibility of the user to acquire
19       whatever license(s) may be required for legal use of this code.
20
21       THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
22       IN VIOLATION OF APPLICABLE PATENT LAW.
23
24
25  Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
26  up to 13 bits; in addition, there is provision for partial clearing of
27  leaf nodes.  PKWARE uses the special code 256 (decimal) to indicate a
28  change in code size or a partial clear of the code tree:  256,1 for the
29  former and 256,2 for the latter.  [Note that partial clearing can "orphan"
30  nodes:  the parent-to-be can be cleared before its new child is added,
31  but the child is added anyway (as an orphan, as though the parent still
32  existed).  When the tree fills up to the point where the parent node is
33  reused, the orphan is effectively "adopted."  Versions prior to 1.05 were
34  affected more due to greater use of pointers (to children and siblings
35  as well as parents).]
36
37  This replacement version of unshrink.c was written from scratch.  It is
38  based only on the algorithms described in Mark Nelson's _The Data Compres-
39  sion Book_ and in Terry Welch's original paper in the June 1984 issue of
40  IEEE _Computer_; no existing source code, including any in Nelson's book,
41  was used.
42
43  Memory requirements have been reduced in this version and are now no more
44  than the original Sam Smith code.  This is still larger than any of the
45  other algorithms:  at a minimum, 8K+8K+16K (stack+values+parents) assuming
46  16-bit short ints, and this does not even include the output buffer (the
47  other algorithms leave the uncompressed data in the work area, typically
48  called slide[]).  For machines with a 64KB data space this is a problem,
49  particularly when text conversion is required and line endings have more
50  than one character.  UnZip's solution is to use two roughly equal halves
51  of outbuf for the ASCII conversion in such a case; the "unshrink" argument
52  to flush() signals that this is the case.
53
54  For large-memory machines, a second outbuf is allocated for translations,
55  but only if unshrinking and only if translations are required.
56
57              | binary mode  |        text mode
58    ---------------------------------------------------
59    big mem   |  big outbuf  | big outbuf + big outbuf2  <- malloc'd here
60    small mem | small outbuf | half + half small outbuf
61
62  Copyright 1994, 1995 Greg Roelofs.  See the accompanying file "COPYING"
63  in UnZip 5.20 (or later) source or binary distributions.
64
65  ---------------------------------------------------------------------------*/
66
67
68#define __UNSHRINK_C    /* identifies this source module */
69#define UNZIP_INTERNAL
70#include "unzip.h"      /* defines LZW_CLEAN by default */
71
72
73#ifndef LZW_CLEAN
74
75static void  partial_clear  OF((__GPRO));
76
77#ifdef DEBUG
78#  define OUTDBG(c) \
79   if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr);
80#else
81#  define OUTDBG(c)
82#endif
83
84/* HSIZE is defined as 2^13 (8192) in unzip.h */
85#define BOGUSCODE  256
86#define FLAG_BITS  parent        /* upper bits of parent[] used as flag bits */
87#define CODE_MASK  (HSIZE - 1)   /* 0x1fff (lower bits are parent's index) */
88#define FREE_CODE  HSIZE         /* 0x2000 (code is unused or was cleared) */
89#define HAS_CHILD  (HSIZE << 1)  /* 0x4000 (code has a child--do not clear) */
90
91#define parent G.area.shrink.Parent
92#define Value  G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
93#define stack  G.area.shrink.Stack
94
95
96/***********************/
97/* Function unshrink() */
98/***********************/
99
100int unshrink(__G)
101     __GDEF
102{
103    int offset = (HSIZE - 1);
104    uch *stacktop = stack + offset;
105    register uch *newstr;
106    int codesize=9, len, KwKwK, error;
107    shrint code, oldcode, freecode, curcode;
108    shrint lastfreecode;
109    unsigned int outbufsiz;
110#if (defined(DLL) && !defined(NO_SLIDE_REDIR))
111    /* Normally realbuf and outbuf will be the same.  However, if the data
112     * are redirected to a large memory buffer, realbuf will point to the
113     * new location while outbuf will remain pointing to the malloc'd
114     * memory buffer. */
115    uch *realbuf = G.outbuf;
116#else
117#   define realbuf G.outbuf
118#endif
119
120
121/*---------------------------------------------------------------------------
122    Initialize various variables.
123  ---------------------------------------------------------------------------*/
124
125    lastfreecode = BOGUSCODE;
126
127#ifndef VMS     /* VMS uses its own buffer scheme for textmode flush(). */
128#ifndef SMALL_MEM
129    /* non-memory-limited machines:  allocate second (large) buffer for
130     * textmode conversion in flush(), but only if needed */
131    if (G.pInfo->textmode && !G.outbuf2 &&
132        (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
133        return PK_MEM3;
134#endif
135#endif /* !VMS */
136
137    for (code = 0;  code < BOGUSCODE;  ++code) {
138        Value[code] = (uch)code;
139        parent[code] = BOGUSCODE;
140    }
141    for (code = BOGUSCODE+1;  code < HSIZE;  ++code)
142        parent[code] = FREE_CODE;
143
144#if (defined(DLL) && !defined(NO_SLIDE_REDIR))
145    if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
146        realbuf = G.redirect_buffer;
147        outbufsiz = (unsigned)G.redirect_size;
148    } else
149#endif
150#ifdef DLL
151    if (G.pInfo->textmode && !G.redirect_data)
152#else
153    if (G.pInfo->textmode)
154#endif
155        outbufsiz = RAWBUFSIZ;
156    else
157        outbufsiz = OUTBUFSIZ;
158    G.outptr = realbuf;
159    G.outcnt = 0L;
160
161/*---------------------------------------------------------------------------
162    Get and output first code, then loop over remaining ones.
163  ---------------------------------------------------------------------------*/
164
165    READBITS(codesize, oldcode)
166    if (!G.zipeof) {
167        *G.outptr++ = (uch)oldcode;
168        OUTDBG((uch)oldcode)
169        ++G.outcnt;
170    }
171
172    do {
173        READBITS(codesize, code)
174        if (G.zipeof)
175            break;
176        if (code == BOGUSCODE) {   /* possible to have consecutive escapes? */
177            READBITS(codesize, code)
178            if (code == 1) {
179                ++codesize;
180                Trace((stderr, " (codesize now %d bits)\n", codesize));
181            } else if (code == 2) {
182                Trace((stderr, " (partial clear code)\n"));
183                partial_clear(__G);   /* clear leafs (nodes with no children) */
184                Trace((stderr, " (done with partial clear)\n"));
185                lastfreecode = BOGUSCODE;  /* reset start of free-node search */
186            }
187            continue;
188        }
189
190    /*-----------------------------------------------------------------------
191        Translate code:  traverse tree from leaf back to root.
192      -----------------------------------------------------------------------*/
193
194        newstr = stacktop;
195        curcode = code;
196
197        if (parent[curcode] == FREE_CODE) {
198            /* or (FLAG_BITS[curcode] & FREE_CODE)? */
199            KwKwK = TRUE;
200            Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
201              oldcode));
202            --newstr;   /* last character will be same as first character */
203            curcode = oldcode;
204        } else
205            KwKwK = FALSE;
206
207        do {
208            *newstr-- = Value[curcode];
209            curcode = (shrint)(parent[curcode] & CODE_MASK);
210        } while (curcode != BOGUSCODE);
211
212        len = (int)(stacktop - newstr++);
213        if (KwKwK)
214            *stacktop = *newstr;
215
216    /*-----------------------------------------------------------------------
217        Write expanded string in reverse order to output buffer.
218      -----------------------------------------------------------------------*/
219
220        Trace((stderr, "code %4d; oldcode %4d; char %3d (%c); string [", code,
221          oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr));
222
223        {
224            register uch *p;
225
226            for (p = newstr;  p < newstr+len;  ++p) {
227                *G.outptr++ = *p;
228                OUTDBG(*p)
229                if (++G.outcnt == outbufsiz) {
230                    Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
231                    if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
232                        Trace((stderr, "unshrink:  flush() error (%d)\n",
233                          error));
234                        return error;
235                    }
236                    G.outptr = realbuf;
237                    G.outcnt = 0L;
238                    Trace((stderr, "done with flush()\n"));
239                }
240            }
241        }
242
243    /*-----------------------------------------------------------------------
244        Add new leaf (first character of newstr) to tree as child of oldcode.
245      -----------------------------------------------------------------------*/
246
247        /* search for freecode */
248        freecode = (shrint)(lastfreecode + 1);
249        /* add if-test before loop for speed? */
250        while (parent[freecode] != FREE_CODE)
251            ++freecode;
252        lastfreecode = freecode;
253        Trace((stderr, "]; newcode %d\n", freecode));
254
255        Value[freecode] = *newstr;
256        parent[freecode] = oldcode;
257        oldcode = code;
258
259    } while (!G.zipeof);
260
261/*---------------------------------------------------------------------------
262    Flush any remaining data and return to sender...
263  ---------------------------------------------------------------------------*/
264
265    if (G.outcnt > 0L) {
266        Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
267        if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
268            Trace((stderr, "unshrink:  flush() error (%d)\n", error));
269            return error;
270        }
271        Trace((stderr, "done with flush()\n"));
272    }
273
274    return PK_OK;
275
276} /* end function unshrink() */
277
278
279
280
281
282/****************************/
283/* Function partial_clear() */      /* no longer recursive... */
284/****************************/
285
286static void partial_clear(__G)
287     __GDEF
288{
289    register shrint code;
290
291    /* clear all nodes which have no children (i.e., leaf nodes only) */
292
293    /* first loop:  mark each parent as such */
294    for (code = BOGUSCODE+1;  code < HSIZE;  ++code) {
295        register shrint cparent = (shrint)(parent[code] & CODE_MASK);
296
297        if (cparent > BOGUSCODE && cparent != FREE_CODE)
298            FLAG_BITS[cparent] |= HAS_CHILD;   /* set parent's child-bit */
299    }
300
301    /* second loop:  clear all nodes *not* marked as parents; reset flag bits */
302    for (code = BOGUSCODE+1;  code < HSIZE;  ++code) {
303        if (FLAG_BITS[code] & HAS_CHILD)    /* just clear child-bit */
304            FLAG_BITS[code] &= ~HAS_CHILD;
305        else {                              /* leaf:  lose it */
306            Trace((stderr, "%d\n", code));
307            parent[code] = FREE_CODE;
308        }
309    }
310
311    return;
312}
313
314#endif /* !LZW_CLEAN */
315