gzread.c revision 237248
1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "gzguts.h"
7
8/* Local functions */
9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10local int gz_avail OF((gz_statep));
11local int gz_look OF((gz_statep));
12local int gz_decomp OF((gz_statep));
13local int gz_fetch OF((gz_statep));
14local int gz_skip OF((gz_statep, z_off64_t));
15
16/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
17   state->fd, and update state->eof, state->err, and state->msg as appropriate.
18   This function needs to loop on read(), since read() is not guaranteed to
19   read the number of bytes requested, depending on the type of descriptor. */
20local int gz_load(state, buf, len, have)
21    gz_statep state;
22    unsigned char *buf;
23    unsigned len;
24    unsigned *have;
25{
26    int ret;
27
28    *have = 0;
29    do {
30        ret = read(state->fd, buf + *have, len - *have);
31        if (ret <= 0)
32            break;
33        *have += ret;
34    } while (*have < len);
35    if (ret < 0) {
36        gz_error(state, Z_ERRNO, zstrerror());
37        return -1;
38    }
39    if (ret == 0)
40        state->eof = 1;
41    return 0;
42}
43
44/* Load up input buffer and set eof flag if last data loaded -- return -1 on
45   error, 0 otherwise.  Note that the eof flag is set when the end of the input
46   file is reached, even though there may be unused data in the buffer.  Once
47   that data has been used, no more attempts will be made to read the file.
48   If strm->avail_in != 0, then the current data is moved to the beginning of
49   the input buffer, and then the remainder of the buffer is loaded with the
50   available data from the input file. */
51local int gz_avail(state)
52    gz_statep state;
53{
54    unsigned got;
55    z_streamp strm = &(state->strm);
56
57    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
58        return -1;
59    if (state->eof == 0) {
60        if (strm->avail_in) {       /* copy what's there to the start */
61            unsigned char *p = state->in, *q = strm->next_in;
62            unsigned n = strm->avail_in;
63            do {
64                *p++ = *q++;
65            } while (--n);
66        }
67        if (gz_load(state, state->in + strm->avail_in,
68                    state->size - strm->avail_in, &got) == -1)
69            return -1;
70        strm->avail_in += got;
71        strm->next_in = state->in;
72    }
73    return 0;
74}
75
76/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
77   If this is the first time in, allocate required memory.  state->how will be
78   left unchanged if there is no more input data available, will be set to COPY
79   if there is no gzip header and direct copying will be performed, or it will
80   be set to GZIP for decompression.  If direct copying, then leftover input
81   data from the input buffer will be copied to the output buffer.  In that
82   case, all further file reads will be directly to either the output buffer or
83   a user buffer.  If decompressing, the inflate state will be initialized.
84   gz_look() will return 0 on success or -1 on failure. */
85local int gz_look(state)
86    gz_statep state;
87{
88    z_streamp strm = &(state->strm);
89
90    /* allocate read buffers and inflate memory */
91    if (state->size == 0) {
92        /* allocate buffers */
93        state->in = malloc(state->want);
94        state->out = malloc(state->want << 1);
95        if (state->in == NULL || state->out == NULL) {
96            if (state->out != NULL)
97                free(state->out);
98            if (state->in != NULL)
99                free(state->in);
100            gz_error(state, Z_MEM_ERROR, "out of memory");
101            return -1;
102        }
103        state->size = state->want;
104
105        /* allocate inflate memory */
106        state->strm.zalloc = Z_NULL;
107        state->strm.zfree = Z_NULL;
108        state->strm.opaque = Z_NULL;
109        state->strm.avail_in = 0;
110        state->strm.next_in = Z_NULL;
111        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
112            free(state->out);
113            free(state->in);
114            state->size = 0;
115            gz_error(state, Z_MEM_ERROR, "out of memory");
116            return -1;
117        }
118    }
119
120    /* get at least the magic bytes in the input buffer */
121    if (strm->avail_in < 2) {
122        if (gz_avail(state) == -1)
123            return -1;
124        if (strm->avail_in == 0)
125            return 0;
126    }
127
128    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
129       a logical dilemma here when considering the case of a partially written
130       gzip file, to wit, if a single 31 byte is written, then we cannot tell
131       whether this is a single-byte file, or just a partially written gzip
132       file -- for here we assume that if a gzip file is being written, then
133       the header will be written in a single operation, so that reading a
134       single byte is sufficient indication that it is not a gzip file) */
135    if (strm->avail_in > 1 &&
136            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
137        inflateReset(strm);
138        state->how = GZIP;
139        state->direct = 0;
140        return 0;
141    }
142
143    /* no gzip header -- if we were decoding gzip before, then this is trailing
144       garbage.  Ignore the trailing garbage and finish. */
145    if (state->direct == 0) {
146        strm->avail_in = 0;
147        state->eof = 1;
148        state->x.have = 0;
149        return 0;
150    }
151
152    /* doing raw i/o, copy any leftover input to output -- this assumes that
153       the output buffer is larger than the input buffer, which also assures
154       space for gzungetc() */
155    state->x.next = state->out;
156    if (strm->avail_in) {
157        memcpy(state->x.next, strm->next_in, strm->avail_in);
158        state->x.have = strm->avail_in;
159        strm->avail_in = 0;
160    }
161    state->how = COPY;
162    state->direct = 1;
163    return 0;
164}
165
166/* Decompress from input to the provided next_out and avail_out in the state.
167   On return, state->x.have and state->x.next point to the just decompressed
168   data.  If the gzip stream completes, state->how is reset to LOOK to look for
169   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
170   on success, -1 on failure. */
171local int gz_decomp(state)
172    gz_statep state;
173{
174    int ret = Z_OK;
175    unsigned had;
176    z_streamp strm = &(state->strm);
177
178    /* fill output buffer up to end of deflate stream */
179    had = strm->avail_out;
180    do {
181        /* get more input for inflate() */
182        if (strm->avail_in == 0 && gz_avail(state) == -1)
183            return -1;
184        if (strm->avail_in == 0) {
185            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
186            break;
187        }
188
189        /* decompress and handle errors */
190        ret = inflate(strm, Z_NO_FLUSH);
191        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
192            gz_error(state, Z_STREAM_ERROR,
193                     "internal error: inflate stream corrupt");
194            return -1;
195        }
196        if (ret == Z_MEM_ERROR) {
197            gz_error(state, Z_MEM_ERROR, "out of memory");
198            return -1;
199        }
200        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
201            gz_error(state, Z_DATA_ERROR,
202                     strm->msg == NULL ? "compressed data error" : strm->msg);
203            return -1;
204        }
205    } while (strm->avail_out && ret != Z_STREAM_END);
206
207    /* update available output */
208    state->x.have = had - strm->avail_out;
209    state->x.next = strm->next_out - state->x.have;
210
211    /* if the gzip stream completed successfully, look for another */
212    if (ret == Z_STREAM_END)
213        state->how = LOOK;
214
215    /* good decompression */
216    return 0;
217}
218
219/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
220   Data is either copied from the input file or decompressed from the input
221   file depending on state->how.  If state->how is LOOK, then a gzip header is
222   looked for to determine whether to copy or decompress.  Returns -1 on error,
223   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
224   end of the input file has been reached and all data has been processed.  */
225local int gz_fetch(state)
226    gz_statep state;
227{
228    z_streamp strm = &(state->strm);
229
230    do {
231        switch(state->how) {
232        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
233            if (gz_look(state) == -1)
234                return -1;
235            if (state->how == LOOK)
236                return 0;
237            break;
238        case COPY:      /* -> COPY */
239            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
240                    == -1)
241                return -1;
242            state->x.next = state->out;
243            return 0;
244        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
245            strm->avail_out = state->size << 1;
246            strm->next_out = state->out;
247            if (gz_decomp(state) == -1)
248                return -1;
249        }
250    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
251    return 0;
252}
253
254/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
255local int gz_skip(state, len)
256    gz_statep state;
257    z_off64_t len;
258{
259    unsigned n;
260
261    /* skip over len bytes or reach end-of-file, whichever comes first */
262    while (len)
263        /* skip over whatever is in output buffer */
264        if (state->x.have) {
265            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
266                (unsigned)len : state->x.have;
267            state->x.have -= n;
268            state->x.next += n;
269            state->x.pos += n;
270            len -= n;
271        }
272
273        /* output buffer empty -- return if we're at the end of the input */
274        else if (state->eof && state->strm.avail_in == 0)
275            break;
276
277        /* need more data to skip -- load up output buffer */
278        else {
279            /* get more output, looking for header if required */
280            if (gz_fetch(state) == -1)
281                return -1;
282        }
283    return 0;
284}
285
286/* -- see zlib.h -- */
287int ZEXPORT gzread(file, buf, len)
288    gzFile file;
289    voidp buf;
290    unsigned len;
291{
292    unsigned got, n;
293    gz_statep state;
294    z_streamp strm;
295
296    /* get internal structure */
297    if (file == NULL)
298        return -1;
299    state = (gz_statep)file;
300    strm = &(state->strm);
301
302    /* check that we're reading and that there's no (serious) error */
303    if (state->mode != GZ_READ ||
304            (state->err != Z_OK && state->err != Z_BUF_ERROR))
305        return -1;
306
307    /* since an int is returned, make sure len fits in one, otherwise return
308       with an error (this avoids the flaw in the interface) */
309    if ((int)len < 0) {
310        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
311        return -1;
312    }
313
314    /* if len is zero, avoid unnecessary operations */
315    if (len == 0)
316        return 0;
317
318    /* process a skip request */
319    if (state->seek) {
320        state->seek = 0;
321        if (gz_skip(state, state->skip) == -1)
322            return -1;
323    }
324
325    /* get len bytes to buf, or less than len if at the end */
326    got = 0;
327    do {
328        /* first just try copying data from the output buffer */
329        if (state->x.have) {
330            n = state->x.have > len ? len : state->x.have;
331            memcpy(buf, state->x.next, n);
332            state->x.next += n;
333            state->x.have -= n;
334        }
335
336        /* output buffer empty -- return if we're at the end of the input */
337        else if (state->eof && strm->avail_in == 0) {
338            state->past = 1;        /* tried to read past end */
339            break;
340        }
341
342        /* need output data -- for small len or new stream load up our output
343           buffer */
344        else if (state->how == LOOK || len < (state->size << 1)) {
345            /* get more output, looking for header if required */
346            if (gz_fetch(state) == -1)
347                return -1;
348            continue;       /* no progress yet -- go back to copy above */
349            /* the copy above assures that we will leave with space in the
350               output buffer, allowing at least one gzungetc() to succeed */
351        }
352
353        /* large len -- read directly into user buffer */
354        else if (state->how == COPY) {      /* read directly */
355            if (gz_load(state, buf, len, &n) == -1)
356                return -1;
357        }
358
359        /* large len -- decompress directly into user buffer */
360        else {  /* state->how == GZIP */
361            strm->avail_out = len;
362            strm->next_out = buf;
363            if (gz_decomp(state) == -1)
364                return -1;
365            n = state->x.have;
366            state->x.have = 0;
367        }
368
369        /* update progress */
370        len -= n;
371        buf = (char *)buf + n;
372        got += n;
373        state->x.pos += n;
374    } while (len);
375
376    /* return number of bytes read into user buffer (will fit in int) */
377    return (int)got;
378}
379
380/* -- see zlib.h -- */
381#undef gzgetc
382int ZEXPORT gzgetc(file)
383    gzFile file;
384{
385    int ret;
386    unsigned char buf[1];
387    gz_statep state;
388
389    /* get internal structure */
390    if (file == NULL)
391        return -1;
392    state = (gz_statep)file;
393
394    /* check that we're reading and that there's no (serious) error */
395    if (state->mode != GZ_READ ||
396        (state->err != Z_OK && state->err != Z_BUF_ERROR))
397        return -1;
398
399    /* try output buffer (no need to check for skip request) */
400    if (state->x.have) {
401        state->x.have--;
402        state->x.pos++;
403        return *(state->x.next)++;
404    }
405
406    /* nothing there -- try gzread() */
407    ret = gzread(file, buf, 1);
408    return ret < 1 ? -1 : buf[0];
409}
410
411int ZEXPORT gzgetc_(file)
412gzFile file;
413{
414    return gzgetc(file);
415}
416
417/* -- see zlib.h -- */
418int ZEXPORT gzungetc(c, file)
419    int c;
420    gzFile file;
421{
422    gz_statep state;
423
424    /* get internal structure */
425    if (file == NULL)
426        return -1;
427    state = (gz_statep)file;
428
429    /* check that we're reading and that there's no (serious) error */
430    if (state->mode != GZ_READ ||
431        (state->err != Z_OK && state->err != Z_BUF_ERROR))
432        return -1;
433
434    /* process a skip request */
435    if (state->seek) {
436        state->seek = 0;
437        if (gz_skip(state, state->skip) == -1)
438            return -1;
439    }
440
441    /* can't push EOF */
442    if (c < 0)
443        return -1;
444
445    /* if output buffer empty, put byte at end (allows more pushing) */
446    if (state->x.have == 0) {
447        state->x.have = 1;
448        state->x.next = state->out + (state->size << 1) - 1;
449        state->x.next[0] = c;
450        state->x.pos--;
451        state->past = 0;
452        return c;
453    }
454
455    /* if no room, give up (must have already done a gzungetc()) */
456    if (state->x.have == (state->size << 1)) {
457        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
458        return -1;
459    }
460
461    /* slide output data if needed and insert byte before existing data */
462    if (state->x.next == state->out) {
463        unsigned char *src = state->out + state->x.have;
464        unsigned char *dest = state->out + (state->size << 1);
465        while (src > state->out)
466            *--dest = *--src;
467        state->x.next = dest;
468    }
469    state->x.have++;
470    state->x.next--;
471    state->x.next[0] = c;
472    state->x.pos--;
473    state->past = 0;
474    return c;
475}
476
477/* -- see zlib.h -- */
478char * ZEXPORT gzgets(file, buf, len)
479    gzFile file;
480    char *buf;
481    int len;
482{
483    unsigned left, n;
484    char *str;
485    unsigned char *eol;
486    gz_statep state;
487
488    /* check parameters and get internal structure */
489    if (file == NULL || buf == NULL || len < 1)
490        return NULL;
491    state = (gz_statep)file;
492
493    /* check that we're reading and that there's no (serious) error */
494    if (state->mode != GZ_READ ||
495        (state->err != Z_OK && state->err != Z_BUF_ERROR))
496        return NULL;
497
498    /* process a skip request */
499    if (state->seek) {
500        state->seek = 0;
501        if (gz_skip(state, state->skip) == -1)
502            return NULL;
503    }
504
505    /* copy output bytes up to new line or len - 1, whichever comes first --
506       append a terminating zero to the string (we don't check for a zero in
507       the contents, let the user worry about that) */
508    str = buf;
509    left = (unsigned)len - 1;
510    if (left) do {
511        /* assure that something is in the output buffer */
512        if (state->x.have == 0 && gz_fetch(state) == -1)
513            return NULL;                /* error */
514        if (state->x.have == 0) {       /* end of file */
515            state->past = 1;            /* read past end */
516            break;                      /* return what we have */
517        }
518
519        /* look for end-of-line in current output buffer */
520        n = state->x.have > left ? left : state->x.have;
521        eol = memchr(state->x.next, '\n', n);
522        if (eol != NULL)
523            n = (unsigned)(eol - state->x.next) + 1;
524
525        /* copy through end-of-line, or remainder if not found */
526        memcpy(buf, state->x.next, n);
527        state->x.have -= n;
528        state->x.next += n;
529        state->x.pos += n;
530        left -= n;
531        buf += n;
532    } while (left && eol == NULL);
533
534    /* return terminated string, or if nothing, end of file */
535    if (buf == str)
536        return NULL;
537    buf[0] = 0;
538    return str;
539}
540
541/* -- see zlib.h -- */
542int ZEXPORT gzdirect(file)
543    gzFile file;
544{
545    gz_statep state;
546
547    /* get internal structure */
548    if (file == NULL)
549        return 0;
550    state = (gz_statep)file;
551
552    /* if the state is not known, but we can find out, then do so (this is
553       mainly for right after a gzopen() or gzdopen()) */
554    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
555        (void)gz_look(state);
556
557    /* return 1 if transparent, 0 if processing a gzip stream */
558    return state->direct;
559}
560
561/* -- see zlib.h -- */
562int ZEXPORT gzclose_r(file)
563    gzFile file;
564{
565    int ret, err;
566    gz_statep state;
567
568    /* get internal structure */
569    if (file == NULL)
570        return Z_STREAM_ERROR;
571    state = (gz_statep)file;
572
573    /* check that we're reading */
574    if (state->mode != GZ_READ)
575        return Z_STREAM_ERROR;
576
577    /* free memory and close file */
578    if (state->size) {
579        inflateEnd(&(state->strm));
580        free(state->out);
581        free(state->in);
582    }
583    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
584    gz_error(state, Z_OK, NULL);
585    free(state->path);
586    ret = close(state->fd);
587    free(state);
588    return ret ? Z_ERRNO : err;
589}
590