1/* 2 Copyright (c) 1990-2008 Info-ZIP. All rights reserved. 3 4 See the accompanying file LICENSE, version 2000-Apr-09 or later 5 (the contents of which are also included in unzip.h) for terms of use. 6 If, for some reason, all these files are missing, the Info-ZIP license 7 also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html 8*/ 9/*--------------------------------------------------------------------------- 10 11 unshrink.c version 1.22 19 Mar 2008 12 13 14 NOTE: This code may or may not infringe on the so-called "Welch 15 patent" owned by Unisys. (From reading the patent, it appears 16 that a pure LZW decompressor is *not* covered, but this claim has 17 not been tested in court, and Unisys is reported to believe other- 18 wise.) It is therefore the responsibility of the user to acquire 19 whatever license(s) may be required for legal use of this code. 20 21 THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE 22 IN VIOLATION OF APPLICABLE PATENT LAW. 23 24 25 Shrinking is basically a dynamic LZW algorithm with allowed code sizes of 26 up to 13 bits; in addition, there is provision for partial clearing of 27 leaf nodes. PKWARE uses the special code 256 (decimal) to indicate a 28 change in code size or a partial clear of the code tree: 256,1 for the 29 former and 256,2 for the latter. [Note that partial clearing can "orphan" 30 nodes: the parent-to-be can be cleared before its new child is added, 31 but the child is added anyway (as an orphan, as though the parent still 32 existed). When the tree fills up to the point where the parent node is 33 reused, the orphan is effectively "adopted." Versions prior to 1.05 were 34 affected more due to greater use of pointers (to children and siblings 35 as well as parents).] 36 37 This replacement version of unshrink.c was written from scratch. It is 38 based only on the algorithms described in Mark Nelson's _The Data Compres- 39 sion Book_ and in Terry Welch's original paper in the June 1984 issue of 40 IEEE _Computer_; no existing source code, including any in Nelson's book, 41 was used. 42 43 Memory requirements have been reduced in this version and are now no more 44 than the original Sam Smith code. This is still larger than any of the 45 other algorithms: at a minimum, 8K+8K+16K (stack+values+parents) assuming 46 16-bit short ints, and this does not even include the output buffer (the 47 other algorithms leave the uncompressed data in the work area, typically 48 called slide[]). For machines with a 64KB data space this is a problem, 49 particularly when text conversion is required and line endings have more 50 than one character. UnZip's solution is to use two roughly equal halves 51 of outbuf for the ASCII conversion in such a case; the "unshrink" argument 52 to flush() signals that this is the case. 53 54 For large-memory machines, a second outbuf is allocated for translations, 55 but only if unshrinking and only if translations are required. 56 57 | binary mode | text mode 58 --------------------------------------------------- 59 big mem | big outbuf | big outbuf + big outbuf2 <- malloc'd here 60 small mem | small outbuf | half + half small outbuf 61 62 Copyright 1994, 1995 Greg Roelofs. See the accompanying file "COPYING" 63 in UnZip 5.20 (or later) source or binary distributions. 64 65 ---------------------------------------------------------------------------*/ 66 67 68#define __UNSHRINK_C /* identifies this source module */ 69#define UNZIP_INTERNAL 70#include "unzip.h" 71 72 73#ifndef LZW_CLEAN 74 75static void partial_clear OF((__GPRO__ int lastcodeused)); 76 77#ifdef DEBUG 78# define OUTDBG(c) \ 79 if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr); 80#else 81# define OUTDBG(c) 82#endif 83 84/* HSIZE is defined as 2^13 (8192) in unzip.h (resp. unzpriv.h */ 85#define BOGUSCODE 256 86#define FLAG_BITS parent /* upper bits of parent[] used as flag bits */ 87#define CODE_MASK (HSIZE - 1) /* 0x1fff (lower bits are parent's index) */ 88#define FREE_CODE HSIZE /* 0x2000 (code is unused or was cleared) */ 89#define HAS_CHILD (HSIZE << 1) /* 0x4000 (code has a child--do not clear) */ 90 91#define parent G.area.shrink.Parent 92#define Value G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */ 93#define stack G.area.shrink.Stack 94 95 96/***********************/ 97/* Function unshrink() */ 98/***********************/ 99 100int unshrink(__G) 101 __GDEF 102{ 103 uch *stacktop = stack + (HSIZE - 1); 104 register uch *newstr; 105 uch finalval; 106 int codesize=9, len, error; 107 shrint code, oldcode, curcode; 108 shrint lastfreecode; 109 unsigned int outbufsiz; 110#if (defined(DLL) && !defined(NO_SLIDE_REDIR)) 111 /* Normally realbuf and outbuf will be the same. However, if the data 112 * are redirected to a large memory buffer, realbuf will point to the 113 * new location while outbuf will remain pointing to the malloc'd 114 * memory buffer. */ 115 uch *realbuf = G.outbuf; 116#else 117# define realbuf G.outbuf 118#endif 119 120 121/*--------------------------------------------------------------------------- 122 Initialize various variables. 123 ---------------------------------------------------------------------------*/ 124 125 lastfreecode = BOGUSCODE; 126 127#ifndef VMS /* VMS uses its own buffer scheme for textmode flush(). */ 128#ifndef SMALL_MEM 129 /* non-memory-limited machines: allocate second (large) buffer for 130 * textmode conversion in flush(), but only if needed */ 131 if (G.pInfo->textmode && !G.outbuf2 && 132 (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL) 133 return PK_MEM3; 134#endif 135#endif /* !VMS */ 136 137 for (code = 0; code < BOGUSCODE; ++code) { 138 Value[code] = (uch)code; 139 parent[code] = BOGUSCODE; 140 } 141 for (code = BOGUSCODE+1; code < HSIZE; ++code) 142 parent[code] = FREE_CODE; 143 144#if (defined(DLL) && !defined(NO_SLIDE_REDIR)) 145 if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */ 146 realbuf = G.redirect_buffer; 147 outbufsiz = (unsigned)G.redirect_size; 148 } else 149#endif 150#ifdef DLL 151 if (G.pInfo->textmode && !G.redirect_data) 152#else 153 if (G.pInfo->textmode) 154#endif 155 outbufsiz = RAWBUFSIZ; 156 else 157 outbufsiz = OUTBUFSIZ; 158 G.outptr = realbuf; 159 G.outcnt = 0L; 160 161/*--------------------------------------------------------------------------- 162 Get and output first code, then loop over remaining ones. 163 ---------------------------------------------------------------------------*/ 164 165 READBITS(codesize, oldcode) 166 if (G.zipeof) 167 return PK_OK; 168 169 finalval = (uch)oldcode; 170 OUTDBG(finalval) 171 *G.outptr++ = finalval; 172 ++G.outcnt; 173 174 while (TRUE) { 175 READBITS(codesize, code) 176 if (G.zipeof) 177 break; 178 if (code == BOGUSCODE) { /* possible to have consecutive escapes? */ 179 READBITS(codesize, code) 180 if (G.zipeof) 181 break; 182 if (code == 1) { 183 ++codesize; 184 Trace((stderr, " (codesize now %d bits)\n", codesize)); 185 if (codesize > MAX_BITS) return PK_ERR; 186 } else if (code == 2) { 187 Trace((stderr, " (partial clear code)\n")); 188 /* clear leafs (nodes with no children) */ 189 partial_clear(__G__ lastfreecode); 190 Trace((stderr, " (done with partial clear)\n")); 191 lastfreecode = BOGUSCODE; /* reset start of free-node search */ 192 } 193 continue; 194 } 195 196 /*----------------------------------------------------------------------- 197 Translate code: traverse tree from leaf back to root. 198 -----------------------------------------------------------------------*/ 199 200 newstr = stacktop; 201 curcode = code; 202 203 if (parent[code] == FREE_CODE) { 204 /* or (FLAG_BITS[code] & FREE_CODE)? */ 205 Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code, 206 oldcode)); 207 *newstr-- = finalval; 208 code = oldcode; 209 } 210 211 while (code != BOGUSCODE) { 212 if (newstr < stack) { 213 /* Bogus compression stream caused buffer underflow! */ 214 Trace((stderr, "unshrink stack overflow!\n")); 215 return PK_ERR; 216 } 217 if (parent[code] == FREE_CODE) { 218 /* or (FLAG_BITS[code] & FREE_CODE)? */ 219 Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", 220 code, oldcode)); 221 *newstr-- = finalval; 222 code = oldcode; 223 } else { 224 *newstr-- = Value[code]; 225 code = (shrint)(parent[code] & CODE_MASK); 226 } 227 } 228 229 len = (int)(stacktop - newstr++); 230 finalval = *newstr; 231 232 /*----------------------------------------------------------------------- 233 Write expanded string in reverse order to output buffer. 234 -----------------------------------------------------------------------*/ 235 236 Trace((stderr, 237 "code %4d; oldcode %4d; char %3d (%c); len %d; string [", curcode, 238 oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr, 239 len)); 240 241 { 242 register uch *p; 243 244 for (p = newstr; p < newstr+len; ++p) { 245 *G.outptr++ = *p; 246 OUTDBG(*p) 247 if (++G.outcnt == outbufsiz) { 248 Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt)); 249 if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) { 250 Trace((stderr, "unshrink: flush() error (%d)\n", 251 error)); 252 return error; 253 } 254 G.outptr = realbuf; 255 G.outcnt = 0L; 256 Trace((stderr, "done with flush()\n")); 257 } 258 } 259 } 260 261 /*----------------------------------------------------------------------- 262 Add new leaf (first character of newstr) to tree as child of oldcode. 263 -----------------------------------------------------------------------*/ 264 265 /* search for freecode */ 266 code = (shrint)(lastfreecode + 1); 267 /* add if-test before loop for speed? */ 268 while ((code < HSIZE) && (parent[code] != FREE_CODE)) 269 ++code; 270 lastfreecode = code; 271 Trace((stderr, "]; newcode %d\n", code)); 272 if (code >= HSIZE) 273 /* invalid compressed data caused max-code overflow! */ 274 return PK_ERR; 275 276 Value[code] = finalval; 277 parent[code] = oldcode; 278 oldcode = curcode; 279 280 } 281 282/*--------------------------------------------------------------------------- 283 Flush any remaining data and return to sender... 284 ---------------------------------------------------------------------------*/ 285 286 if (G.outcnt > 0L) { 287 Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt)); 288 if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) { 289 Trace((stderr, "unshrink: flush() error (%d)\n", error)); 290 return error; 291 } 292 Trace((stderr, "done with flush()\n")); 293 } 294 295 return PK_OK; 296 297} /* end function unshrink() */ 298 299 300 301 302 303/****************************/ 304/* Function partial_clear() */ /* no longer recursive... */ 305/****************************/ 306 307static void partial_clear(__G__ lastcodeused) 308 __GDEF 309 int lastcodeused; 310{ 311 register shrint code; 312 313 /* clear all nodes which have no children (i.e., leaf nodes only) */ 314 315 /* first loop: mark each parent as such */ 316 for (code = BOGUSCODE+1; code <= lastcodeused; ++code) { 317 register shrint cparent = (shrint)(parent[code] & CODE_MASK); 318 319 if (cparent > BOGUSCODE) 320 FLAG_BITS[cparent] |= HAS_CHILD; /* set parent's child-bit */ 321 } 322 323 /* second loop: clear all nodes *not* marked as parents; reset flag bits */ 324 for (code = BOGUSCODE+1; code <= lastcodeused; ++code) { 325 if (FLAG_BITS[code] & HAS_CHILD) /* just clear child-bit */ 326 FLAG_BITS[code] &= ~HAS_CHILD; 327 else { /* leaf: lose it */ 328 Trace((stderr, "%d\n", code)); 329 parent[code] = FREE_CODE; 330 } 331 } 332 333 return; 334} 335 336#endif /* !LZW_CLEAN */ 337