gzread.c revision 205194
1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "gzguts.h"
7
8/* Local functions */
9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10local int gz_avail OF((gz_statep));
11local int gz_next4 OF((gz_statep, unsigned long *));
12local int gz_head OF((gz_statep));
13local int gz_decomp OF((gz_statep));
14local int gz_make OF((gz_statep));
15local int gz_skip OF((gz_statep, z_off64_t));
16
17/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
18   state->fd, and update state->eof, state->err, and state->msg as appropriate.
19   This function needs to loop on read(), since read() is not guaranteed to
20   read the number of bytes requested, depending on the type of descriptor. */
21local int gz_load(state, buf, len, have)
22    gz_statep state;
23    unsigned char *buf;
24    unsigned len;
25    unsigned *have;
26{
27    int ret;
28
29    *have = 0;
30    do {
31        ret = read(state->fd, buf + *have, len - *have);
32        if (ret <= 0)
33            break;
34        *have += ret;
35    } while (*have < len);
36    if (ret < 0) {
37        gz_error(state, Z_ERRNO, zstrerror());
38        return -1;
39    }
40    if (ret == 0)
41        state->eof = 1;
42    return 0;
43}
44
45/* Load up input buffer and set eof flag if last data loaded -- return -1 on
46   error, 0 otherwise.  Note that the eof flag is set when the end of the input
47   file is reached, even though there may be unused data in the buffer.  Once
48   that data has been used, no more attempts will be made to read the file.
49   gz_avail() assumes that strm->avail_in == 0. */
50local int gz_avail(state)
51    gz_statep state;
52{
53    z_streamp strm = &(state->strm);
54
55    if (state->err != Z_OK)
56        return -1;
57    if (state->eof == 0) {
58        if (gz_load(state, state->in, state->size, &(strm->avail_in)) == -1)
59            return -1;
60        strm->next_in = state->in;
61    }
62    return 0;
63}
64
65/* Get next byte from input, or -1 if end or error. */
66#define NEXT() ((strm->avail_in == 0 && gz_avail(state) == -1) ? -1 : \
67                (strm->avail_in == 0 ? -1 : \
68                 (strm->avail_in--, *(strm->next_in)++)))
69
70/* Get a four-byte little-endian integer and return 0 on success and the value
71   in *ret.  Otherwise -1 is returned and *ret is not modified. */
72local int gz_next4(state, ret)
73    gz_statep state;
74    unsigned long *ret;
75{
76    int ch;
77    unsigned long val;
78    z_streamp strm = &(state->strm);
79
80    val = NEXT();
81    val += (unsigned)NEXT() << 8;
82    val += (unsigned long)NEXT() << 16;
83    ch = NEXT();
84    if (ch == -1)
85        return -1;
86    val += (unsigned long)ch << 24;
87    *ret = val;
88    return 0;
89}
90
91/* Look for gzip header, set up for inflate or copy.  state->have must be zero.
92   If this is the first time in, allocate required memory.  state->how will be
93   left unchanged if there is no more input data available, will be set to COPY
94   if there is no gzip header and direct copying will be performed, or it will
95   be set to GZIP for decompression, and the gzip header will be skipped so
96   that the next available input data is the raw deflate stream.  If direct
97   copying, then leftover input data from the input buffer will be copied to
98   the output buffer.  In that case, all further file reads will be directly to
99   either the output buffer or a user buffer.  If decompressing, the inflate
100   state and the check value will be initialized.  gz_head() will return 0 on
101   success or -1 on failure.  Failures may include read errors or gzip header
102   errors.  */
103local int gz_head(state)
104    gz_statep state;
105{
106    z_streamp strm = &(state->strm);
107    int flags;
108    unsigned len;
109
110    /* allocate read buffers and inflate memory */
111    if (state->size == 0) {
112        /* allocate buffers */
113        state->in = malloc(state->want);
114        state->out = malloc(state->want << 1);
115        if (state->in == NULL || state->out == NULL) {
116            if (state->out != NULL)
117                free(state->out);
118            if (state->in != NULL)
119                free(state->in);
120            gz_error(state, Z_MEM_ERROR, "out of memory");
121            return -1;
122        }
123        state->size = state->want;
124
125        /* allocate inflate memory */
126        state->strm.zalloc = Z_NULL;
127        state->strm.zfree = Z_NULL;
128        state->strm.opaque = Z_NULL;
129        state->strm.avail_in = 0;
130        state->strm.next_in = Z_NULL;
131        if (inflateInit2(&(state->strm), -15) != Z_OK) {    /* raw inflate */
132            free(state->out);
133            free(state->in);
134            state->size = 0;
135            gz_error(state, Z_MEM_ERROR, "out of memory");
136            return -1;
137        }
138    }
139
140    /* get some data in the input buffer */
141    if (strm->avail_in == 0) {
142        if (gz_avail(state) == -1)
143            return -1;
144        if (strm->avail_in == 0)
145            return 0;
146    }
147
148    /* look for the gzip magic header bytes 31 and 139 */
149    if (strm->next_in[0] == 31) {
150        strm->avail_in--;
151        strm->next_in++;
152        if (strm->avail_in == 0 && gz_avail(state) == -1)
153            return -1;
154        if (strm->avail_in && strm->next_in[0] == 139) {
155            /* we have a gzip header, woo hoo! */
156            strm->avail_in--;
157            strm->next_in++;
158
159            /* skip rest of header */
160            if (NEXT() != 8) {      /* compression method */
161                gz_error(state, Z_DATA_ERROR, "unknown compression method");
162                return -1;
163            }
164            flags = NEXT();
165            if (flags & 0xe0) {     /* reserved flag bits */
166                gz_error(state, Z_DATA_ERROR, "unknown header flags set");
167                return -1;
168            }
169            NEXT();                 /* modification time */
170            NEXT();
171            NEXT();
172            NEXT();
173            NEXT();                 /* extra flags */
174            NEXT();                 /* operating system */
175            if (flags & 4) {        /* extra field */
176                len = (unsigned)NEXT();
177                len += (unsigned)NEXT() << 8;
178                while (len--)
179                    if (NEXT() < 0)
180                        break;
181            }
182            if (flags & 8)          /* file name */
183                while (NEXT() > 0)
184                    ;
185            if (flags & 16)         /* comment */
186                while (NEXT() > 0)
187                    ;
188            if (flags & 2) {        /* header crc */
189                NEXT();
190                NEXT();
191            }
192            /* an unexpected end of file is not checked for here -- it will be
193               noticed on the first request for uncompressed data */
194
195            /* set up for decompression */
196            inflateReset(strm);
197            strm->adler = crc32(0L, Z_NULL, 0);
198            state->how = GZIP;
199            state->direct = 0;
200            return 0;
201        }
202        else {
203            /* not a gzip file -- save first byte (31) and fall to raw i/o */
204            state->out[0] = 31;
205            state->have = 1;
206        }
207    }
208
209    /* doing raw i/o, save start of raw data for seeking, copy any leftover
210       input to output -- this assumes that the output buffer is larger than
211       the input buffer, which also assures space for gzungetc() */
212    state->raw = state->pos;
213    state->next = state->out;
214    if (strm->avail_in) {
215        memcpy(state->next + state->have, strm->next_in, strm->avail_in);
216        state->have += strm->avail_in;
217        strm->avail_in = 0;
218    }
219    state->how = COPY;
220    state->direct = 1;
221    return 0;
222}
223
224/* Decompress from input to the provided next_out and avail_out in the state.
225   If the end of the compressed data is reached, then verify the gzip trailer
226   check value and length (modulo 2^32).  state->have and state->next are set
227   to point to the just decompressed data, and the crc is updated.  If the
228   trailer is verified, state->how is reset to LOOK to look for the next gzip
229   stream or raw data, once state->have is depleted.  Returns 0 on success, -1
230   on failure.  Failures may include invalid compressed data or a failed gzip
231   trailer verification. */
232local int gz_decomp(state)
233    gz_statep state;
234{
235    int ret;
236    unsigned had;
237    unsigned long crc, len;
238    z_streamp strm = &(state->strm);
239
240    /* fill output buffer up to end of deflate stream */
241    had = strm->avail_out;
242    do {
243        /* get more input for inflate() */
244        if (strm->avail_in == 0 && gz_avail(state) == -1)
245            return -1;
246        if (strm->avail_in == 0) {
247            gz_error(state, Z_DATA_ERROR, "unexpected end of file");
248            return -1;
249        }
250
251        /* decompress and handle errors */
252        ret = inflate(strm, Z_NO_FLUSH);
253        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
254            gz_error(state, Z_STREAM_ERROR,
255                      "internal error: inflate stream corrupt");
256            return -1;
257        }
258        if (ret == Z_MEM_ERROR) {
259            gz_error(state, Z_MEM_ERROR, "out of memory");
260            return -1;
261        }
262        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
263            gz_error(state, Z_DATA_ERROR,
264                      strm->msg == NULL ? "compressed data error" : strm->msg);
265            return -1;
266        }
267    } while (strm->avail_out && ret != Z_STREAM_END);
268
269    /* update available output and crc check value */
270    state->have = had - strm->avail_out;
271    state->next = strm->next_out - state->have;
272    strm->adler = crc32(strm->adler, state->next, state->have);
273
274    /* check gzip trailer if at end of deflate stream */
275    if (ret == Z_STREAM_END) {
276        if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) {
277            gz_error(state, Z_DATA_ERROR, "unexpected end of file");
278            return -1;
279        }
280        if (crc != strm->adler) {
281            gz_error(state, Z_DATA_ERROR, "incorrect data check");
282            return -1;
283        }
284        if (len != (strm->total_out & 0xffffffffL)) {
285            gz_error(state, Z_DATA_ERROR, "incorrect length check");
286            return -1;
287        }
288        state->how = LOOK;      /* ready for next stream, once have is 0 (leave
289                                   state->direct unchanged to remember how) */
290    }
291
292    /* good decompression */
293    return 0;
294}
295
296/* Make data and put in the output buffer.  Assumes that state->have == 0.
297   Data is either copied from the input file or decompressed from the input
298   file depending on state->how.  If state->how is LOOK, then a gzip header is
299   looked for (and skipped if found) to determine wither to copy or decompress.
300   Returns -1 on error, otherwise 0.  gz_make() will leave state->have as COPY
301   or GZIP unless the end of the input file has been reached and all data has
302   been processed.  */
303local int gz_make(state)
304    gz_statep state;
305{
306    z_streamp strm = &(state->strm);
307
308    if (state->how == LOOK) {           /* look for gzip header */
309        if (gz_head(state) == -1)
310            return -1;
311        if (state->have)                /* got some data from gz_head() */
312            return 0;
313    }
314    if (state->how == COPY) {           /* straight copy */
315        if (gz_load(state, state->out, state->size << 1, &(state->have)) == -1)
316            return -1;
317        state->next = state->out;
318    }
319    else if (state->how == GZIP) {      /* decompress */
320        strm->avail_out = state->size << 1;
321        strm->next_out = state->out;
322        if (gz_decomp(state) == -1)
323            return -1;
324    }
325    return 0;
326}
327
328/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
329local int gz_skip(state, len)
330    gz_statep state;
331    z_off64_t len;
332{
333    unsigned n;
334
335    /* skip over len bytes or reach end-of-file, whichever comes first */
336    while (len)
337        /* skip over whatever is in output buffer */
338        if (state->have) {
339            n = GT_OFF(state->have) || (z_off64_t)state->have > len ?
340                (unsigned)len : state->have;
341            state->have -= n;
342            state->next += n;
343            state->pos += n;
344            len -= n;
345        }
346
347        /* output buffer empty -- return if we're at the end of the input */
348        else if (state->eof && state->strm.avail_in == 0)
349            break;
350
351        /* need more data to skip -- load up output buffer */
352        else {
353            /* get more output, looking for header if required */
354            if (gz_make(state) == -1)
355                return -1;
356        }
357    return 0;
358}
359
360/* -- see zlib.h -- */
361int ZEXPORT gzread(file, buf, len)
362    gzFile file;
363    voidp buf;
364    unsigned len;
365{
366    unsigned got, n;
367    gz_statep state;
368    z_streamp strm;
369
370    /* get internal structure */
371    if (file == NULL)
372        return -1;
373    state = (gz_statep)file;
374    strm = &(state->strm);
375
376    /* check that we're reading and that there's no error */
377    if (state->mode != GZ_READ || state->err != Z_OK)
378        return -1;
379
380    /* since an int is returned, make sure len fits in one, otherwise return
381       with an error (this avoids the flaw in the interface) */
382    if ((int)len < 0) {
383        gz_error(state, Z_BUF_ERROR, "requested length does not fit in int");
384        return -1;
385    }
386
387    /* if len is zero, avoid unnecessary operations */
388    if (len == 0)
389        return 0;
390
391    /* process a skip request */
392    if (state->seek) {
393        state->seek = 0;
394        if (gz_skip(state, state->skip) == -1)
395            return -1;
396    }
397
398    /* get len bytes to buf, or less than len if at the end */
399    got = 0;
400    do {
401        /* first just try copying data from the output buffer */
402        if (state->have) {
403            n = state->have > len ? len : state->have;
404            memcpy(buf, state->next, n);
405            state->next += n;
406            state->have -= n;
407        }
408
409        /* output buffer empty -- return if we're at the end of the input */
410        else if (state->eof && strm->avail_in == 0)
411            break;
412
413        /* need output data -- for small len or new stream load up our output
414           buffer */
415        else if (state->how == LOOK || len < (state->size << 1)) {
416            /* get more output, looking for header if required */
417            if (gz_make(state) == -1)
418                return -1;
419            continue;       /* no progress yet -- go back to memcpy() above */
420            /* the copy above assures that we will leave with space in the
421               output buffer, allowing at least one gzungetc() to succeed */
422        }
423
424        /* large len -- read directly into user buffer */
425        else if (state->how == COPY) {      /* read directly */
426            if (gz_load(state, buf, len, &n) == -1)
427                return -1;
428        }
429
430        /* large len -- decompress directly into user buffer */
431        else {  /* state->how == GZIP */
432            strm->avail_out = len;
433            strm->next_out = buf;
434            if (gz_decomp(state) == -1)
435                return -1;
436            n = state->have;
437            state->have = 0;
438        }
439
440        /* update progress */
441        len -= n;
442        buf = (char *)buf + n;
443        got += n;
444        state->pos += n;
445    } while (len);
446
447    /* return number of bytes read into user buffer (will fit in int) */
448    return (int)got;
449}
450
451/* -- see zlib.h -- */
452int ZEXPORT gzgetc(file)
453    gzFile file;
454{
455    int ret;
456    unsigned char buf[1];
457    gz_statep state;
458
459    /* get internal structure */
460    if (file == NULL)
461        return -1;
462    state = (gz_statep)file;
463
464    /* check that we're reading and that there's no error */
465    if (state->mode != GZ_READ || state->err != Z_OK)
466        return -1;
467
468    /* try output buffer (no need to check for skip request) */
469    if (state->have) {
470        state->have--;
471        state->pos++;
472        return *(state->next)++;
473    }
474
475    /* nothing there -- try gzread() */
476    ret = gzread(file, buf, 1);
477    return ret < 1 ? -1 : buf[0];
478}
479
480/* -- see zlib.h -- */
481int ZEXPORT gzungetc(c, file)
482    int c;
483    gzFile file;
484{
485    gz_statep state;
486
487    /* get internal structure */
488    if (file == NULL)
489        return -1;
490    state = (gz_statep)file;
491
492    /* check that we're reading and that there's no error */
493    if (state->mode != GZ_READ || state->err != Z_OK)
494        return -1;
495
496    /* process a skip request */
497    if (state->seek) {
498        state->seek = 0;
499        if (gz_skip(state, state->skip) == -1)
500            return -1;
501    }
502
503    /* can't push EOF */
504    if (c < 0)
505        return -1;
506
507    /* if output buffer empty, put byte at end (allows more pushing) */
508    if (state->have == 0) {
509        state->have = 1;
510        state->next = state->out + (state->size << 1) - 1;
511        state->next[0] = c;
512        state->pos--;
513        return c;
514    }
515
516    /* if no room, give up (must have already done a gzungetc()) */
517    if (state->have == (state->size << 1)) {
518        gz_error(state, Z_BUF_ERROR, "out of room to push characters");
519        return -1;
520    }
521
522    /* slide output data if needed and insert byte before existing data */
523    if (state->next == state->out) {
524        unsigned char *src = state->out + state->have;
525        unsigned char *dest = state->out + (state->size << 1);
526        while (src > state->out)
527            *--dest = *--src;
528        state->next = dest;
529    }
530    state->have++;
531    state->next--;
532    state->next[0] = c;
533    state->pos--;
534    return c;
535}
536
537/* -- see zlib.h -- */
538char * ZEXPORT gzgets(file, buf, len)
539    gzFile file;
540    char *buf;
541    int len;
542{
543    unsigned left, n;
544    char *str;
545    unsigned char *eol;
546    gz_statep state;
547
548    /* check parameters and get internal structure */
549    if (file == NULL || buf == NULL || len < 1)
550        return NULL;
551    state = (gz_statep)file;
552
553    /* check that we're reading and that there's no error */
554    if (state->mode != GZ_READ || state->err != Z_OK)
555        return NULL;
556
557    /* process a skip request */
558    if (state->seek) {
559        state->seek = 0;
560        if (gz_skip(state, state->skip) == -1)
561            return NULL;
562    }
563
564    /* copy output bytes up to new line or len - 1, whichever comes first --
565       append a terminating zero to the string (we don't check for a zero in
566       the contents, let the user worry about that) */
567    str = buf;
568    left = (unsigned)len - 1;
569    if (left) do {
570        /* assure that something is in the output buffer */
571        if (state->have == 0) {
572            if (gz_make(state) == -1)
573                return NULL;            /* error */
574            if (state->have == 0) {     /* end of file */
575                if (buf == str)         /* got bupkus */
576                    return NULL;
577                break;                  /* got something -- return it */
578            }
579        }
580
581        /* look for end-of-line in current output buffer */
582        n = state->have > left ? left : state->have;
583        eol = memchr(state->next, '\n', n);
584        if (eol != NULL)
585            n = (unsigned)(eol - state->next) + 1;
586
587        /* copy through end-of-line, or remainder if not found */
588        memcpy(buf, state->next, n);
589        state->have -= n;
590        state->next += n;
591        state->pos += n;
592        left -= n;
593        buf += n;
594    } while (left && eol == NULL);
595
596    /* found end-of-line or out of space -- terminate string and return it */
597    buf[0] = 0;
598    return str;
599}
600
601/* -- see zlib.h -- */
602int ZEXPORT gzdirect(file)
603    gzFile file;
604{
605    gz_statep state;
606
607    /* get internal structure */
608    if (file == NULL)
609        return 0;
610    state = (gz_statep)file;
611
612    /* check that we're reading */
613    if (state->mode != GZ_READ)
614        return 0;
615
616    /* if the state is not known, but we can find out, then do so (this is
617       mainly for right after a gzopen() or gzdopen()) */
618    if (state->how == LOOK && state->have == 0)
619        (void)gz_head(state);
620
621    /* return 1 if reading direct, 0 if decompressing a gzip stream */
622    return state->direct;
623}
624
625/* -- see zlib.h -- */
626int ZEXPORT gzclose_r(file)
627    gzFile file;
628{
629    int ret;
630    gz_statep state;
631
632    /* get internal structure */
633    if (file == NULL)
634        return Z_STREAM_ERROR;
635    state = (gz_statep)file;
636
637    /* check that we're reading */
638    if (state->mode != GZ_READ)
639        return Z_STREAM_ERROR;
640
641    /* free memory and close file */
642    if (state->size) {
643        inflateEnd(&(state->strm));
644        free(state->out);
645        free(state->in);
646    }
647    gz_error(state, Z_OK, NULL);
648    free(state->path);
649    ret = close(state->fd);
650    free(state);
651    return ret ? Z_ERRNO : Z_OK;
652}
653