1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "gzguts.h"
7
8/* Local functions */
9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10local int gz_avail OF((gz_statep));
11local int gz_look OF((gz_statep));
12local int gz_decomp OF((gz_statep));
13local int gz_fetch OF((gz_statep));
14local int gz_skip OF((gz_statep, z_off64_t));
15local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
16
17/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
18   state->fd, and update state->eof, state->err, and state->msg as appropriate.
19   This function needs to loop on read(), since read() is not guaranteed to
20   read the number of bytes requested, depending on the type of descriptor. */
21local int gz_load(state, buf, len, have)
22    gz_statep state;
23    unsigned char *buf;
24    unsigned len;
25    unsigned *have;
26{
27    int ret;
28    unsigned get, max = ((unsigned)-1 >> 2) + 1;
29
30    *have = 0;
31    do {
32        get = len - *have;
33        if (get > max)
34            get = max;
35        ret = read(state->fd, buf + *have, get);
36        if (ret <= 0)
37            break;
38        *have += (unsigned)ret;
39    } while (*have < len);
40    if (ret < 0) {
41        gz_error(state, Z_ERRNO, zstrerror());
42        return -1;
43    }
44    if (ret == 0)
45        state->eof = 1;
46    return 0;
47}
48
49/* Load up input buffer and set eof flag if last data loaded -- return -1 on
50   error, 0 otherwise.  Note that the eof flag is set when the end of the input
51   file is reached, even though there may be unused data in the buffer.  Once
52   that data has been used, no more attempts will be made to read the file.
53   If strm->avail_in != 0, then the current data is moved to the beginning of
54   the input buffer, and then the remainder of the buffer is loaded with the
55   available data from the input file. */
56local int gz_avail(state)
57    gz_statep state;
58{
59    unsigned got;
60    z_streamp strm = &(state->strm);
61
62    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
63        return -1;
64    if (state->eof == 0) {
65        if (strm->avail_in) {       /* copy what's there to the start */
66            unsigned char *p = state->in;
67            unsigned const char *q = strm->next_in;
68            unsigned n = strm->avail_in;
69            do {
70                *p++ = *q++;
71            } while (--n);
72        }
73        if (gz_load(state, state->in + strm->avail_in,
74                    state->size - strm->avail_in, &got) == -1)
75            return -1;
76        strm->avail_in += got;
77        strm->next_in = state->in;
78    }
79    return 0;
80}
81
82/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
83   If this is the first time in, allocate required memory.  state->how will be
84   left unchanged if there is no more input data available, will be set to COPY
85   if there is no gzip header and direct copying will be performed, or it will
86   be set to GZIP for decompression.  If direct copying, then leftover input
87   data from the input buffer will be copied to the output buffer.  In that
88   case, all further file reads will be directly to either the output buffer or
89   a user buffer.  If decompressing, the inflate state will be initialized.
90   gz_look() will return 0 on success or -1 on failure. */
91local int gz_look(state)
92    gz_statep state;
93{
94    z_streamp strm = &(state->strm);
95
96    /* allocate read buffers and inflate memory */
97    if (state->size == 0) {
98        /* allocate buffers */
99        state->in = (unsigned char *)malloc(state->want);
100        state->out = (unsigned char *)malloc(state->want << 1);
101        if (state->in == NULL || state->out == NULL) {
102            free(state->out);
103            free(state->in);
104            gz_error(state, Z_MEM_ERROR, "out of memory");
105            return -1;
106        }
107        state->size = state->want;
108
109        /* allocate inflate memory */
110        state->strm.zalloc = Z_NULL;
111        state->strm.zfree = Z_NULL;
112        state->strm.opaque = Z_NULL;
113        state->strm.avail_in = 0;
114        state->strm.next_in = Z_NULL;
115        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
116            free(state->out);
117            free(state->in);
118            state->size = 0;
119            gz_error(state, Z_MEM_ERROR, "out of memory");
120            return -1;
121        }
122    }
123
124    /* get at least the magic bytes in the input buffer */
125    if (strm->avail_in < 2) {
126        if (gz_avail(state) == -1)
127            return -1;
128        if (strm->avail_in == 0)
129            return 0;
130    }
131
132    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
133       a logical dilemma here when considering the case of a partially written
134       gzip file, to wit, if a single 31 byte is written, then we cannot tell
135       whether this is a single-byte file, or just a partially written gzip
136       file -- for here we assume that if a gzip file is being written, then
137       the header will be written in a single operation, so that reading a
138       single byte is sufficient indication that it is not a gzip file) */
139    if (strm->avail_in > 1 &&
140            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
141        inflateReset(strm);
142        state->how = GZIP;
143        state->direct = 0;
144        return 0;
145    }
146
147    /* no gzip header -- if we were decoding gzip before, then this is trailing
148       garbage.  Ignore the trailing garbage and finish. */
149    if (state->direct == 0) {
150        strm->avail_in = 0;
151        state->eof = 1;
152        state->x.have = 0;
153        return 0;
154    }
155
156    /* doing raw i/o, copy any leftover input to output -- this assumes that
157       the output buffer is larger than the input buffer, which also assures
158       space for gzungetc() */
159    state->x.next = state->out;
160    if (strm->avail_in) {
161        memcpy(state->x.next, strm->next_in, strm->avail_in);
162        state->x.have = strm->avail_in;
163        strm->avail_in = 0;
164    }
165    state->how = COPY;
166    state->direct = 1;
167    return 0;
168}
169
170/* Decompress from input to the provided next_out and avail_out in the state.
171   On return, state->x.have and state->x.next point to the just decompressed
172   data.  If the gzip stream completes, state->how is reset to LOOK to look for
173   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
174   on success, -1 on failure. */
175local int gz_decomp(state)
176    gz_statep state;
177{
178    int ret = Z_OK;
179    unsigned had;
180    z_streamp strm = &(state->strm);
181
182    /* fill output buffer up to end of deflate stream */
183    had = strm->avail_out;
184    do {
185        /* get more input for inflate() */
186        if (strm->avail_in == 0 && gz_avail(state) == -1)
187            return -1;
188        if (strm->avail_in == 0) {
189            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
190            break;
191        }
192
193        /* decompress and handle errors */
194        ret = inflate(strm, Z_NO_FLUSH);
195        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
196            gz_error(state, Z_STREAM_ERROR,
197                     "internal error: inflate stream corrupt");
198            return -1;
199        }
200        if (ret == Z_MEM_ERROR) {
201            gz_error(state, Z_MEM_ERROR, "out of memory");
202            return -1;
203        }
204        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
205            gz_error(state, Z_DATA_ERROR,
206                     strm->msg == NULL ? "compressed data error" : strm->msg);
207            return -1;
208        }
209    } while (strm->avail_out && ret != Z_STREAM_END);
210
211    /* update available output */
212    state->x.have = had - strm->avail_out;
213    state->x.next = strm->next_out - state->x.have;
214
215    /* if the gzip stream completed successfully, look for another */
216    if (ret == Z_STREAM_END)
217        state->how = LOOK;
218
219    /* good decompression */
220    return 0;
221}
222
223/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
224   Data is either copied from the input file or decompressed from the input
225   file depending on state->how.  If state->how is LOOK, then a gzip header is
226   looked for to determine whether to copy or decompress.  Returns -1 on error,
227   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
228   end of the input file has been reached and all data has been processed.  */
229local int gz_fetch(state)
230    gz_statep state;
231{
232    z_streamp strm = &(state->strm);
233
234    do {
235        switch(state->how) {
236        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
237            if (gz_look(state) == -1)
238                return -1;
239            if (state->how == LOOK)
240                return 0;
241            break;
242        case COPY:      /* -> COPY */
243            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
244                    == -1)
245                return -1;
246            state->x.next = state->out;
247            return 0;
248        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
249            strm->avail_out = state->size << 1;
250            strm->next_out = state->out;
251            if (gz_decomp(state) == -1)
252                return -1;
253        }
254    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
255    return 0;
256}
257
258/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
259local int gz_skip(state, len)
260    gz_statep state;
261    z_off64_t len;
262{
263    unsigned n;
264
265    /* skip over len bytes or reach end-of-file, whichever comes first */
266    while (len)
267        /* skip over whatever is in output buffer */
268        if (state->x.have) {
269            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
270                (unsigned)len : state->x.have;
271            state->x.have -= n;
272            state->x.next += n;
273            state->x.pos += n;
274            len -= n;
275        }
276
277        /* output buffer empty -- return if we're at the end of the input */
278        else if (state->eof && state->strm.avail_in == 0)
279            break;
280
281        /* need more data to skip -- load up output buffer */
282        else {
283            /* get more output, looking for header if required */
284            if (gz_fetch(state) == -1)
285                return -1;
286        }
287    return 0;
288}
289
290/* Read len bytes into buf from file, or less than len up to the end of the
291   input.  Return the number of bytes read.  If zero is returned, either the
292   end of file was reached, or there was an error.  state->err must be
293   consulted in that case to determine which. */
294local z_size_t gz_read(state, buf, len)
295    gz_statep state;
296    voidp buf;
297    z_size_t len;
298{
299    z_size_t got;
300    unsigned n;
301
302    /* if len is zero, avoid unnecessary operations */
303    if (len == 0)
304        return 0;
305
306    /* process a skip request */
307    if (state->seek) {
308        state->seek = 0;
309        if (gz_skip(state, state->skip) == -1)
310            return 0;
311    }
312
313    /* get len bytes to buf, or less than len if at the end */
314    got = 0;
315    do {
316        /* set n to the maximum amount of len that fits in an unsigned int */
317        n = -1;
318        if (n > len)
319            n = len;
320
321        /* first just try copying data from the output buffer */
322        if (state->x.have) {
323            if (state->x.have < n)
324                n = state->x.have;
325            memcpy(buf, state->x.next, n);
326            state->x.next += n;
327            state->x.have -= n;
328        }
329
330        /* output buffer empty -- return if we're at the end of the input */
331        else if (state->eof && state->strm.avail_in == 0) {
332            state->past = 1;        /* tried to read past end */
333            break;
334        }
335
336        /* need output data -- for small len or new stream load up our output
337           buffer */
338        else if (state->how == LOOK || n < (state->size << 1)) {
339            /* get more output, looking for header if required */
340            if (gz_fetch(state) == -1)
341                return 0;
342            continue;       /* no progress yet -- go back to copy above */
343            /* the copy above assures that we will leave with space in the
344               output buffer, allowing at least one gzungetc() to succeed */
345        }
346
347        /* large len -- read directly into user buffer */
348        else if (state->how == COPY) {      /* read directly */
349            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
350                return 0;
351        }
352
353        /* large len -- decompress directly into user buffer */
354        else {  /* state->how == GZIP */
355            state->strm.avail_out = n;
356            state->strm.next_out = (unsigned char *)buf;
357            if (gz_decomp(state) == -1)
358                return 0;
359            n = state->x.have;
360            state->x.have = 0;
361        }
362
363        /* update progress */
364        len -= n;
365        buf = (char *)buf + n;
366        got += n;
367        state->x.pos += n;
368    } while (len);
369
370    /* return number of bytes read into user buffer */
371    return got;
372}
373
374/* -- see zlib.h -- */
375int ZEXPORT gzread(file, buf, len)
376    gzFile file;
377    voidp buf;
378    unsigned len;
379{
380    gz_statep state;
381
382    /* get internal structure */
383    if (file == NULL)
384        return -1;
385    state = (gz_statep)file;
386
387    /* check that we're reading and that there's no (serious) error */
388    if (state->mode != GZ_READ ||
389            (state->err != Z_OK && state->err != Z_BUF_ERROR))
390        return -1;
391
392    /* since an int is returned, make sure len fits in one, otherwise return
393       with an error (this avoids a flaw in the interface) */
394    if ((int)len < 0) {
395        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
396        return -1;
397    }
398
399    /* read len or fewer bytes to buf */
400    len = gz_read(state, buf, len);
401
402    /* check for an error */
403    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
404        return -1;
405
406    /* return the number of bytes read (this is assured to fit in an int) */
407    return (int)len;
408}
409
410/* -- see zlib.h -- */
411z_size_t ZEXPORT gzfread(buf, size, nitems, file)
412    voidp buf;
413    z_size_t size;
414    z_size_t nitems;
415    gzFile file;
416{
417    z_size_t len;
418    gz_statep state;
419
420    /* get internal structure */
421    if (file == NULL)
422        return 0;
423    state = (gz_statep)file;
424
425    /* check that we're reading and that there's no (serious) error */
426    if (state->mode != GZ_READ ||
427            (state->err != Z_OK && state->err != Z_BUF_ERROR))
428        return 0;
429
430    /* compute bytes to read -- error on overflow */
431    len = nitems * size;
432    if (size && len / size != nitems) {
433        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
434        return 0;
435    }
436
437    /* read len or fewer bytes to buf, return the number of full items read */
438    return len ? gz_read(state, buf, len) / size : 0;
439}
440
441/* -- see zlib.h -- */
442#ifdef Z_PREFIX_SET
443#  undef z_gzgetc
444#else
445#  undef gzgetc
446#endif
447int ZEXPORT gzgetc(file)
448    gzFile file;
449{
450    int ret;
451    unsigned char buf[1];
452    gz_statep state;
453
454    /* get internal structure */
455    if (file == NULL)
456        return -1;
457    state = (gz_statep)file;
458
459    /* check that we're reading and that there's no (serious) error */
460    if (state->mode != GZ_READ ||
461        (state->err != Z_OK && state->err != Z_BUF_ERROR))
462        return -1;
463
464    /* try output buffer (no need to check for skip request) */
465    if (state->x.have) {
466        state->x.have--;
467        state->x.pos++;
468        return *(state->x.next)++;
469    }
470
471    /* nothing there -- try gz_read() */
472    ret = gz_read(state, buf, 1);
473    return ret < 1 ? -1 : buf[0];
474}
475
476int ZEXPORT gzgetc_(file)
477gzFile file;
478{
479    return gzgetc(file);
480}
481
482/* -- see zlib.h -- */
483int ZEXPORT gzungetc(c, file)
484    int c;
485    gzFile file;
486{
487    gz_statep state;
488
489    /* get internal structure */
490    if (file == NULL)
491        return -1;
492    state = (gz_statep)file;
493
494    /* check that we're reading and that there's no (serious) error */
495    if (state->mode != GZ_READ ||
496        (state->err != Z_OK && state->err != Z_BUF_ERROR))
497        return -1;
498
499    /* process a skip request */
500    if (state->seek) {
501        state->seek = 0;
502        if (gz_skip(state, state->skip) == -1)
503            return -1;
504    }
505
506    /* can't push EOF */
507    if (c < 0)
508        return -1;
509
510    /* if output buffer empty, put byte at end (allows more pushing) */
511    if (state->x.have == 0) {
512        state->x.have = 1;
513        state->x.next = state->out + (state->size << 1) - 1;
514        state->x.next[0] = (unsigned char)c;
515        state->x.pos--;
516        state->past = 0;
517        return c;
518    }
519
520    /* if no room, give up (must have already done a gzungetc()) */
521    if (state->x.have == (state->size << 1)) {
522        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
523        return -1;
524    }
525
526    /* slide output data if needed and insert byte before existing data */
527    if (state->x.next == state->out) {
528        unsigned char *src = state->out + state->x.have;
529        unsigned char *dest = state->out + (state->size << 1);
530        while (src > state->out)
531            *--dest = *--src;
532        state->x.next = dest;
533    }
534    state->x.have++;
535    state->x.next--;
536    state->x.next[0] = (unsigned char)c;
537    state->x.pos--;
538    state->past = 0;
539    return c;
540}
541
542/* -- see zlib.h -- */
543char * ZEXPORT gzgets(file, buf, len)
544    gzFile file;
545    char *buf;
546    int len;
547{
548    unsigned left, n;
549    char *str;
550    unsigned char *eol;
551    gz_statep state;
552
553    /* check parameters and get internal structure */
554    if (file == NULL || buf == NULL || len < 1)
555        return NULL;
556    state = (gz_statep)file;
557
558    /* check that we're reading and that there's no (serious) error */
559    if (state->mode != GZ_READ ||
560        (state->err != Z_OK && state->err != Z_BUF_ERROR))
561        return NULL;
562
563    /* process a skip request */
564    if (state->seek) {
565        state->seek = 0;
566        if (gz_skip(state, state->skip) == -1)
567            return NULL;
568    }
569
570    /* copy output bytes up to new line or len - 1, whichever comes first --
571       append a terminating zero to the string (we don't check for a zero in
572       the contents, let the user worry about that) */
573    str = buf;
574    left = (unsigned)len - 1;
575    if (left) do {
576        /* assure that something is in the output buffer */
577        if (state->x.have == 0 && gz_fetch(state) == -1)
578            return NULL;                /* error */
579        if (state->x.have == 0) {       /* end of file */
580            state->past = 1;            /* read past end */
581            break;                      /* return what we have */
582        }
583
584        /* look for end-of-line in current output buffer */
585        n = state->x.have > left ? left : state->x.have;
586        eol = (unsigned char *)memchr(state->x.next, '\n', n);
587        if (eol != NULL)
588            n = (unsigned)(eol - state->x.next) + 1;
589
590        /* copy through end-of-line, or remainder if not found */
591        memcpy(buf, state->x.next, n);
592        state->x.have -= n;
593        state->x.next += n;
594        state->x.pos += n;
595        left -= n;
596        buf += n;
597    } while (left && eol == NULL);
598
599    /* return terminated string, or if nothing, end of file */
600    if (buf == str)
601        return NULL;
602    buf[0] = 0;
603    return str;
604}
605
606/* -- see zlib.h -- */
607int ZEXPORT gzdirect(file)
608    gzFile file;
609{
610    gz_statep state;
611
612    /* get internal structure */
613    if (file == NULL)
614        return 0;
615    state = (gz_statep)file;
616
617    /* if the state is not known, but we can find out, then do so (this is
618       mainly for right after a gzopen() or gzdopen()) */
619    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
620        (void)gz_look(state);
621
622    /* return 1 if transparent, 0 if processing a gzip stream */
623    return state->direct;
624}
625
626/* -- see zlib.h -- */
627int ZEXPORT gzclose_r(file)
628    gzFile file;
629{
630    int ret, err;
631    gz_statep state;
632
633    /* get internal structure */
634    if (file == NULL)
635        return Z_STREAM_ERROR;
636    state = (gz_statep)file;
637
638    /* check that we're reading */
639    if (state->mode != GZ_READ)
640        return Z_STREAM_ERROR;
641
642    /* free memory and close file */
643    if (state->size) {
644        inflateEnd(&(state->strm));
645        free(state->out);
646        free(state->in);
647    }
648    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
649    gz_error(state, Z_OK, NULL);
650    free(state->path);
651    ret = close(state->fd);
652    free(state);
653    return ret ? Z_ERRNO : err;
654}
655