1/*	$NetBSD$	*/
2
3/* gun.c -- simple gunzip to give an example of the use of inflateBack()
4 * Copyright (C) 2003, 2005 Mark Adler
5 * For conditions of distribution and use, see copyright notice in zlib.h
6   Version 1.3  12 June 2005  Mark Adler */
7
8/* Version history:
9   1.0  16 Feb 2003  First version for testing of inflateBack()
10   1.1  21 Feb 2005  Decompress concatenated gzip streams
11                     Remove use of "this" variable (C++ keyword)
12                     Fix return value for in()
13                     Improve allocation failure checking
14                     Add typecasting for void * structures
15                     Add -h option for command version and usage
16                     Add a bunch of comments
17   1.2  20 Mar 2005  Add Unix compress (LZW) decompression
18                     Copy file attributes from input file to output file
19   1.3  12 Jun 2005  Add casts for error messages [Oberhumer]
20 */
21
22/*
23   gun [ -t ] [ name ... ]
24
25   decompresses the data in the named gzip files.  If no arguments are given,
26   gun will decompress from stdin to stdout.  The names must end in .gz, -gz,
27   .z, -z, _z, or .Z.  The uncompressed data will be written to a file name
28   with the suffix stripped.  On success, the original file is deleted.  On
29   failure, the output file is deleted.  For most failures, the command will
30   continue to process the remaining names on the command line.  A memory
31   allocation failure will abort the command.  If -t is specified, then the
32   listed files or stdin will be tested as gzip files for integrity (without
33   checking for a proper suffix), no output will be written, and no files
34   will be deleted.
35
36   Like gzip, gun allows concatenated gzip streams and will decompress them,
37   writing all of the uncompressed data to the output.  Unlike gzip, gun allows
38   an empty file on input, and will produce no error writing an empty output
39   file.
40
41   gun will also decompress files made by Unix compress, which uses LZW
42   compression.  These files are automatically detected by virtue of their
43   magic header bytes.  Since the end of Unix compress stream is marked by the
44   end-of-file, they cannot be concantenated.  If a Unix compress stream is
45   encountered in an input file, it is the last stream in that file.
46
47   Like gunzip and uncompress, the file attributes of the orignal compressed
48   file are maintained in the final uncompressed file, to the extent that the
49   user permissions allow it.
50
51   On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
52   1.2.4) is on the same file, when gun is linked with zlib 1.2.2.  Also the
53   LZW decompression provided by gun is about twice as fast as the standard
54   Unix uncompress command.
55 */
56
57/* external functions and related types and constants */
58#include <stdio.h>          /* fprintf() */
59#include <stdlib.h>         /* malloc(), free() */
60#include <string.h>         /* strerror(), strcmp(), strlen(), memcpy() */
61#include <errno.h>          /* errno */
62#include <fcntl.h>          /* open() */
63#include <unistd.h>         /* read(), write(), close(), chown(), unlink() */
64#include <sys/types.h>
65#include <sys/stat.h>       /* stat(), chmod() */
66#include <utime.h>          /* utime() */
67#include "zlib.h"           /* inflateBackInit(), inflateBack(), */
68                            /* inflateBackEnd(), crc32() */
69
70/* function declaration */
71#define local static
72
73/* buffer constants */
74#define SIZE 32768U         /* input and output buffer sizes */
75#define PIECE 16384         /* limits i/o chunks for 16-bit int case */
76
77/* structure for infback() to pass to input function in() -- it maintains the
78   input file and a buffer of size SIZE */
79struct ind {
80    int infile;
81    unsigned char *inbuf;
82};
83
84/* Load input buffer, assumed to be empty, and return bytes loaded and a
85   pointer to them.  read() is called until the buffer is full, or until it
86   returns end-of-file or error.  Return 0 on error. */
87local unsigned in(void *in_desc, unsigned char **buf)
88{
89    int ret;
90    unsigned len;
91    unsigned char *next;
92    struct ind *me = (struct ind *)in_desc;
93
94    next = me->inbuf;
95    *buf = next;
96    len = 0;
97    do {
98        ret = PIECE;
99        if ((unsigned)ret > SIZE - len)
100            ret = (int)(SIZE - len);
101        ret = (int)read(me->infile, next, ret);
102        if (ret == -1) {
103            len = 0;
104            break;
105        }
106        next += ret;
107        len += ret;
108    } while (ret != 0 && len < SIZE);
109    return len;
110}
111
112/* structure for infback() to pass to output function out() -- it maintains the
113   output file, a running CRC-32 check on the output and the total number of
114   bytes output, both for checking against the gzip trailer.  (The length in
115   the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
116   the output is greater than 4 GB.) */
117struct outd {
118    int outfile;
119    int check;                  /* true if checking crc and total */
120    unsigned long crc;
121    unsigned long total;
122};
123
124/* Write output buffer and update the CRC-32 and total bytes written.  write()
125   is called until all of the output is written or an error is encountered.
126   On success out() returns 0.  For a write failure, out() returns 1.  If the
127   output file descriptor is -1, then nothing is written.
128 */
129local int out(void *out_desc, unsigned char *buf, unsigned len)
130{
131    int ret;
132    struct outd *me = (struct outd *)out_desc;
133
134    if (me->check) {
135        me->crc = crc32(me->crc, buf, len);
136        me->total += len;
137    }
138    if (me->outfile != -1)
139        do {
140            ret = PIECE;
141            if ((unsigned)ret > len)
142                ret = (int)len;
143            ret = (int)write(me->outfile, buf, ret);
144            if (ret == -1)
145                return 1;
146            buf += ret;
147            len -= ret;
148        } while (len != 0);
149    return 0;
150}
151
152/* next input byte macro for use inside lunpipe() and gunpipe() */
153#define NEXT() (have ? 0 : (have = in(indp, &next)), \
154                last = have ? (have--, (int)(*next++)) : -1)
155
156/* memory for gunpipe() and lunpipe() --
157   the first 256 entries of prefix[] and suffix[] are never used, could
158   have offset the index, but it's faster to waste the memory */
159unsigned char inbuf[SIZE];              /* input buffer */
160unsigned char outbuf[SIZE];             /* output buffer */
161unsigned short prefix[65536];           /* index to LZW prefix string */
162unsigned char suffix[65536];            /* one-character LZW suffix */
163unsigned char match[65280 + 2];         /* buffer for reversed match or gzip
164                                           32K sliding window */
165
166/* throw out what's left in the current bits byte buffer (this is a vestigial
167   aspect of the compressed data format derived from an implementation that
168   made use of a special VAX machine instruction!) */
169#define FLUSHCODE() \
170    do { \
171        left = 0; \
172        rem = 0; \
173        if (chunk > have) { \
174            chunk -= have; \
175            have = 0; \
176            if (NEXT() == -1) \
177                break; \
178            chunk--; \
179            if (chunk > have) { \
180                chunk = have = 0; \
181                break; \
182            } \
183        } \
184        have -= chunk; \
185        next += chunk; \
186        chunk = 0; \
187    } while (0)
188
189/* Decompress a compress (LZW) file from indp to outfile.  The compress magic
190   header (two bytes) has already been read and verified.  There are have bytes
191   of buffered input at next.  strm is used for passing error information back
192   to gunpipe().
193
194   lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
195   file, read error, or write error (a write error indicated by strm->next_in
196   not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
197 */
198local int lunpipe(unsigned have, unsigned char *next, struct ind *indp,
199                  int outfile, z_stream *strm)
200{
201    int last;                   /* last byte read by NEXT(), or -1 if EOF */
202    int chunk;                  /* bytes left in current chunk */
203    int left;                   /* bits left in rem */
204    unsigned rem;               /* unused bits from input */
205    int bits;                   /* current bits per code */
206    unsigned code;              /* code, table traversal index */
207    unsigned mask;              /* mask for current bits codes */
208    int max;                    /* maximum bits per code for this stream */
209    int flags;                  /* compress flags, then block compress flag */
210    unsigned end;               /* last valid entry in prefix/suffix tables */
211    unsigned temp;              /* current code */
212    unsigned prev;              /* previous code */
213    unsigned final;             /* last character written for previous code */
214    unsigned stack;             /* next position for reversed string */
215    unsigned outcnt;            /* bytes in output buffer */
216    struct outd outd;           /* output structure */
217
218    /* set up output */
219    outd.outfile = outfile;
220    outd.check = 0;
221
222    /* process remainder of compress header -- a flags byte */
223    flags = NEXT();
224    if (last == -1)
225        return Z_BUF_ERROR;
226    if (flags & 0x60) {
227        strm->msg = (char *)"unknown lzw flags set";
228        return Z_DATA_ERROR;
229    }
230    max = flags & 0x1f;
231    if (max < 9 || max > 16) {
232        strm->msg = (char *)"lzw bits out of range";
233        return Z_DATA_ERROR;
234    }
235    if (max == 9)                           /* 9 doesn't really mean 9 */
236        max = 10;
237    flags &= 0x80;                          /* true if block compress */
238
239    /* clear table */
240    bits = 9;
241    mask = 0x1ff;
242    end = flags ? 256 : 255;
243
244    /* set up: get first 9-bit code, which is the first decompressed byte, but
245       don't create a table entry until the next code */
246    if (NEXT() == -1)                       /* no compressed data is ok */
247        return Z_OK;
248    final = prev = (unsigned)last;          /* low 8 bits of code */
249    if (NEXT() == -1)                       /* missing a bit */
250        return Z_BUF_ERROR;
251    if (last & 1) {                         /* code must be < 256 */
252        strm->msg = (char *)"invalid lzw code";
253        return Z_DATA_ERROR;
254    }
255    rem = (unsigned)last >> 1;              /* remaining 7 bits */
256    left = 7;
257    chunk = bits - 2;                       /* 7 bytes left in this chunk */
258    outbuf[0] = (unsigned char)final;       /* write first decompressed byte */
259    outcnt = 1;
260
261    /* decode codes */
262    stack = 0;
263    for (;;) {
264        /* if the table will be full after this, increment the code size */
265        if (end >= mask && bits < max) {
266            FLUSHCODE();
267            bits++;
268            mask <<= 1;
269            mask++;
270        }
271
272        /* get a code of length bits */
273        if (chunk == 0)                     /* decrement chunk modulo bits */
274            chunk = bits;
275        code = rem;                         /* low bits of code */
276        if (NEXT() == -1) {                 /* EOF is end of compressed data */
277            /* write remaining buffered output */
278            if (outcnt && out(&outd, outbuf, outcnt)) {
279                strm->next_in = outbuf;     /* signal write error */
280                return Z_BUF_ERROR;
281            }
282            return Z_OK;
283        }
284        code += (unsigned)last << left;     /* middle (or high) bits of code */
285        left += 8;
286        chunk--;
287        if (bits > left) {                  /* need more bits */
288            if (NEXT() == -1)               /* can't end in middle of code */
289                return Z_BUF_ERROR;
290            code += (unsigned)last << left; /* high bits of code */
291            left += 8;
292            chunk--;
293        }
294        code &= mask;                       /* mask to current code length */
295        left -= bits;                       /* number of unused bits */
296        rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
297
298        /* process clear code (256) */
299        if (code == 256 && flags) {
300            FLUSHCODE();
301            bits = 9;                       /* initialize bits and mask */
302            mask = 0x1ff;
303            end = 255;                      /* empty table */
304            continue;                       /* get next code */
305        }
306
307        /* special code to reuse last match */
308        temp = code;                        /* save the current code */
309        if (code > end) {
310            /* Be picky on the allowed code here, and make sure that the code
311               we drop through (prev) will be a valid index so that random
312               input does not cause an exception.  The code != end + 1 check is
313               empirically derived, and not checked in the original uncompress
314               code.  If this ever causes a problem, that check could be safely
315               removed.  Leaving this check in greatly improves gun's ability
316               to detect random or corrupted input after a compress header.
317               In any case, the prev > end check must be retained. */
318            if (code != end + 1 || prev > end) {
319                strm->msg = (char *)"invalid lzw code";
320                return Z_DATA_ERROR;
321            }
322            match[stack++] = (unsigned char)final;
323            code = prev;
324        }
325
326        /* walk through linked list to generate output in reverse order */
327        while (code >= 256) {
328            match[stack++] = suffix[code];
329            code = prefix[code];
330        }
331        match[stack++] = (unsigned char)code;
332        final = code;
333
334        /* link new table entry */
335        if (end < mask) {
336            end++;
337            prefix[end] = (unsigned short)prev;
338            suffix[end] = (unsigned char)final;
339        }
340
341        /* set previous code for next iteration */
342        prev = temp;
343
344        /* write output in forward order */
345        while (stack > SIZE - outcnt) {
346            while (outcnt < SIZE)
347                outbuf[outcnt++] = match[--stack];
348            if (out(&outd, outbuf, outcnt)) {
349                strm->next_in = outbuf; /* signal write error */
350                return Z_BUF_ERROR;
351            }
352            outcnt = 0;
353        }
354        do {
355            outbuf[outcnt++] = match[--stack];
356        } while (stack);
357
358        /* loop for next code with final and prev as the last match, rem and
359           left provide the first 0..7 bits of the next code, end is the last
360           valid table entry */
361    }
362}
363
364/* Decompress a gzip file from infile to outfile.  strm is assumed to have been
365   successfully initialized with inflateBackInit().  The input file may consist
366   of a series of gzip streams, in which case all of them will be decompressed
367   to the output file.  If outfile is -1, then the gzip stream(s) integrity is
368   checked and nothing is written.
369
370   The return value is a zlib error code: Z_MEM_ERROR if out of memory,
371   Z_DATA_ERROR if the header or the compressed data is invalid, or if the
372   trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
373   prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
374   stream) follows a valid gzip stream.
375 */
376local int gunpipe(z_stream *strm, int infile, int outfile)
377{
378    int ret, first, last;
379    unsigned have, flags, len;
380    unsigned char *next;
381    struct ind ind, *indp;
382    struct outd outd;
383
384    /* setup input buffer */
385    ind.infile = infile;
386    ind.inbuf = inbuf;
387    indp = &ind;
388
389    /* decompress concatenated gzip streams */
390    have = 0;                               /* no input data read in yet */
391    first = 1;                              /* looking for first gzip header */
392    strm->next_in = Z_NULL;                 /* so Z_BUF_ERROR means EOF */
393    for (;;) {
394        /* look for the two magic header bytes for a gzip stream */
395        if (NEXT() == -1) {
396            ret = Z_OK;
397            break;                          /* empty gzip stream is ok */
398        }
399        if (last != 31 || (NEXT() != 139 && last != 157)) {
400            strm->msg = (char *)"incorrect header check";
401            ret = first ? Z_DATA_ERROR : Z_ERRNO;
402            break;                          /* not a gzip or compress header */
403        }
404        first = 0;                          /* next non-header is junk */
405
406        /* process a compress (LZW) file -- can't be concatenated after this */
407        if (last == 157) {
408            ret = lunpipe(have, next, indp, outfile, strm);
409            break;
410        }
411
412        /* process remainder of gzip header */
413        ret = Z_BUF_ERROR;
414        if (NEXT() != 8) {                  /* only deflate method allowed */
415            if (last == -1) break;
416            strm->msg = (char *)"unknown compression method";
417            ret = Z_DATA_ERROR;
418            break;
419        }
420        flags = NEXT();                     /* header flags */
421        NEXT();                             /* discard mod time, xflgs, os */
422        NEXT();
423        NEXT();
424        NEXT();
425        NEXT();
426        NEXT();
427        if (last == -1) break;
428        if (flags & 0xe0) {
429            strm->msg = (char *)"unknown header flags set";
430            ret = Z_DATA_ERROR;
431            break;
432        }
433        if (flags & 4) {                    /* extra field */
434            len = NEXT();
435            len += (unsigned)(NEXT()) << 8;
436            if (last == -1) break;
437            while (len > have) {
438                len -= have;
439                have = 0;
440                if (NEXT() == -1) break;
441                len--;
442            }
443            if (last == -1) break;
444            have -= len;
445            next += len;
446        }
447        if (flags & 8)                      /* file name */
448            while (NEXT() != 0 && last != -1)
449                ;
450        if (flags & 16)                     /* comment */
451            while (NEXT() != 0 && last != -1)
452                ;
453        if (flags & 2) {                    /* header crc */
454            NEXT();
455            NEXT();
456        }
457        if (last == -1) break;
458
459        /* set up output */
460        outd.outfile = outfile;
461        outd.check = 1;
462        outd.crc = crc32(0L, Z_NULL, 0);
463        outd.total = 0;
464
465        /* decompress data to output */
466        strm->next_in = next;
467        strm->avail_in = have;
468        ret = inflateBack(strm, in, indp, out, &outd);
469        if (ret != Z_STREAM_END) break;
470        next = strm->next_in;
471        have = strm->avail_in;
472        strm->next_in = Z_NULL;             /* so Z_BUF_ERROR means EOF */
473
474        /* check trailer */
475        ret = Z_BUF_ERROR;
476        if (NEXT() != (outd.crc & 0xff) ||
477            NEXT() != ((outd.crc >> 8) & 0xff) ||
478            NEXT() != ((outd.crc >> 16) & 0xff) ||
479            NEXT() != ((outd.crc >> 24) & 0xff)) {
480            /* crc error */
481            if (last != -1) {
482                strm->msg = (char *)"incorrect data check";
483                ret = Z_DATA_ERROR;
484            }
485            break;
486        }
487        if (NEXT() != (outd.total & 0xff) ||
488            NEXT() != ((outd.total >> 8) & 0xff) ||
489            NEXT() != ((outd.total >> 16) & 0xff) ||
490            NEXT() != ((outd.total >> 24) & 0xff)) {
491            /* length error */
492            if (last != -1) {
493                strm->msg = (char *)"incorrect length check";
494                ret = Z_DATA_ERROR;
495            }
496            break;
497        }
498
499        /* go back and look for another gzip stream */
500    }
501
502    /* clean up and return */
503    return ret;
504}
505
506/* Copy file attributes, from -> to, as best we can.  This is best effort, so
507   no errors are reported.  The mode bits, including suid, sgid, and the sticky
508   bit are copied (if allowed), the owner's user id and group id are copied
509   (again if allowed), and the access and modify times are copied. */
510local void copymeta(char *from, char *to)
511{
512    struct stat was;
513    struct utimbuf when;
514
515    /* get all of from's Unix meta data, return if not a regular file */
516    if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
517        return;
518
519    /* set to's mode bits, ignore errors */
520    (void)chmod(to, was.st_mode & 07777);
521
522    /* copy owner's user and group, ignore errors */
523    (void)chown(to, was.st_uid, was.st_gid);
524
525    /* copy access and modify times, ignore errors */
526    when.actime = was.st_atime;
527    when.modtime = was.st_mtime;
528    (void)utime(to, &when);
529}
530
531/* Decompress the file inname to the file outnname, of if test is true, just
532   decompress without writing and check the gzip trailer for integrity.  If
533   inname is NULL or an empty string, read from stdin.  If outname is NULL or
534   an empty string, write to stdout.  strm is a pre-initialized inflateBack
535   structure.  When appropriate, copy the file attributes from inname to
536   outname.
537
538   gunzip() returns 1 if there is an out-of-memory error or an unexpected
539   return code from gunpipe().  Otherwise it returns 0.
540 */
541local int gunzip(z_stream *strm, char *inname, char *outname, int test)
542{
543    int ret;
544    int infile, outfile;
545
546    /* open files */
547    if (inname == NULL || *inname == 0) {
548        inname = "-";
549        infile = 0;     /* stdin */
550    }
551    else {
552        infile = open(inname, O_RDONLY, 0);
553        if (infile == -1) {
554            fprintf(stderr, "gun cannot open %s\n", inname);
555            return 0;
556        }
557    }
558    if (test)
559        outfile = -1;
560    else if (outname == NULL || *outname == 0) {
561        outname = "-";
562        outfile = 1;    /* stdout */
563    }
564    else {
565        outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
566        if (outfile == -1) {
567            close(infile);
568            fprintf(stderr, "gun cannot create %s\n", outname);
569            return 0;
570        }
571    }
572    errno = 0;
573
574    /* decompress */
575    ret = gunpipe(strm, infile, outfile);
576    if (outfile > 2) close(outfile);
577    if (infile > 2) close(infile);
578
579    /* interpret result */
580    switch (ret) {
581    case Z_OK:
582    case Z_ERRNO:
583        if (infile > 2 && outfile > 2) {
584            copymeta(inname, outname);          /* copy attributes */
585            unlink(inname);
586        }
587        if (ret == Z_ERRNO)
588            fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
589                    inname);
590        break;
591    case Z_DATA_ERROR:
592        if (outfile > 2) unlink(outname);
593        fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
594        break;
595    case Z_MEM_ERROR:
596        if (outfile > 2) unlink(outname);
597        fprintf(stderr, "gun out of memory error--aborting\n");
598        return 1;
599    case Z_BUF_ERROR:
600        if (outfile > 2) unlink(outname);
601        if (strm->next_in != Z_NULL) {
602            fprintf(stderr, "gun write error on %s: %s\n",
603                    outname, strerror(errno));
604        }
605        else if (errno) {
606            fprintf(stderr, "gun read error on %s: %s\n",
607                    inname, strerror(errno));
608        }
609        else {
610            fprintf(stderr, "gun unexpected end of file on %s\n",
611                    inname);
612        }
613        break;
614    default:
615        if (outfile > 2) unlink(outname);
616        fprintf(stderr, "gun internal error--aborting\n");
617        return 1;
618    }
619    return 0;
620}
621
622/* Process the gun command line arguments.  See the command syntax near the
623   beginning of this source file. */
624int main(int argc, char **argv)
625{
626    int ret, len, test;
627    char *outname;
628    unsigned char *window;
629    z_stream strm;
630
631    /* initialize inflateBack state for repeated use */
632    window = match;                         /* reuse LZW match buffer */
633    strm.zalloc = Z_NULL;
634    strm.zfree = Z_NULL;
635    strm.opaque = Z_NULL;
636    ret = inflateBackInit(&strm, 15, window);
637    if (ret != Z_OK) {
638        fprintf(stderr, "gun out of memory error--aborting\n");
639        return 1;
640    }
641
642    /* decompress each file to the same name with the suffix removed */
643    argc--;
644    argv++;
645    test = 0;
646    if (argc && strcmp(*argv, "-h") == 0) {
647        fprintf(stderr, "gun 1.3 (12 Jun 2005)\n");
648        fprintf(stderr, "Copyright (c) 2005 Mark Adler\n");
649        fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
650        return 0;
651    }
652    if (argc && strcmp(*argv, "-t") == 0) {
653        test = 1;
654        argc--;
655        argv++;
656    }
657    if (argc)
658        do {
659            if (test)
660                outname = NULL;
661            else {
662                len = (int)strlen(*argv);
663                if (strcmp(*argv + len - 3, ".gz") == 0 ||
664                    strcmp(*argv + len - 3, "-gz") == 0)
665                    len -= 3;
666                else if (strcmp(*argv + len - 2, ".z") == 0 ||
667                    strcmp(*argv + len - 2, "-z") == 0 ||
668                    strcmp(*argv + len - 2, "_z") == 0 ||
669                    strcmp(*argv + len - 2, ".Z") == 0)
670                    len -= 2;
671                else {
672                    fprintf(stderr, "gun error: no gz type on %s--skipping\n",
673                            *argv);
674                    continue;
675                }
676                outname = malloc(len + 1);
677                if (outname == NULL) {
678                    fprintf(stderr, "gun out of memory error--aborting\n");
679                    ret = 1;
680                    break;
681                }
682                memcpy(outname, *argv, len);
683                outname[len] = 0;
684            }
685            ret = gunzip(&strm, *argv, outname, test);
686            if (outname != NULL) free(outname);
687            if (ret) break;
688        } while (argv++, --argc);
689    else
690        ret = gunzip(&strm, NULL, NULL, test);
691
692    /* clean up */
693    inflateBackEnd(&strm);
694    return ret;
695}
696