1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6/* $FreeBSD: stable/11/sys/contrib/zlib/gzread.c 311285 2017-01-04 16:09:08Z delphij $ */
7
8#include "gzguts.h"
9#include <unistd.h>
10
11/* Local functions */
12local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
13local int gz_avail OF((gz_statep));
14local int gz_look OF((gz_statep));
15local int gz_decomp OF((gz_statep));
16local int gz_fetch OF((gz_statep));
17local int gz_skip OF((gz_statep, z_off64_t));
18local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
19
20/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
21   state->fd, and update state->eof, state->err, and state->msg as appropriate.
22   This function needs to loop on read(), since read() is not guaranteed to
23   read the number of bytes requested, depending on the type of descriptor. */
24local int gz_load(state, buf, len, have)
25    gz_statep state;
26    unsigned char *buf;
27    unsigned len;
28    unsigned *have;
29{
30    int ret;
31    unsigned get, max = ((unsigned)-1 >> 2) + 1;
32
33    *have = 0;
34    do {
35        get = len - *have;
36        if (get > max)
37            get = max;
38        ret = read(state->fd, buf + *have, get);
39        if (ret <= 0)
40            break;
41        *have += (unsigned)ret;
42    } while (*have < len);
43    if (ret < 0) {
44        gz_error(state, Z_ERRNO, zstrerror());
45        return -1;
46    }
47    if (ret == 0)
48        state->eof = 1;
49    return 0;
50}
51
52/* Load up input buffer and set eof flag if last data loaded -- return -1 on
53   error, 0 otherwise.  Note that the eof flag is set when the end of the input
54   file is reached, even though there may be unused data in the buffer.  Once
55   that data has been used, no more attempts will be made to read the file.
56   If strm->avail_in != 0, then the current data is moved to the beginning of
57   the input buffer, and then the remainder of the buffer is loaded with the
58   available data from the input file. */
59local int gz_avail(state)
60    gz_statep state;
61{
62    unsigned got;
63    z_streamp strm = &(state->strm);
64
65    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
66        return -1;
67    if (state->eof == 0) {
68        if (strm->avail_in) {       /* copy what's there to the start */
69            unsigned char *p = state->in;
70            unsigned const char *q = strm->next_in;
71            unsigned n = strm->avail_in;
72            do {
73                *p++ = *q++;
74            } while (--n);
75        }
76        if (gz_load(state, state->in + strm->avail_in,
77                    state->size - strm->avail_in, &got) == -1)
78            return -1;
79        strm->avail_in += got;
80        strm->next_in = state->in;
81    }
82    return 0;
83}
84
85/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
86   If this is the first time in, allocate required memory.  state->how will be
87   left unchanged if there is no more input data available, will be set to COPY
88   if there is no gzip header and direct copying will be performed, or it will
89   be set to GZIP for decompression.  If direct copying, then leftover input
90   data from the input buffer will be copied to the output buffer.  In that
91   case, all further file reads will be directly to either the output buffer or
92   a user buffer.  If decompressing, the inflate state will be initialized.
93   gz_look() will return 0 on success or -1 on failure. */
94local int gz_look(state)
95    gz_statep state;
96{
97    z_streamp strm = &(state->strm);
98
99    /* allocate read buffers and inflate memory */
100    if (state->size == 0) {
101        /* allocate buffers */
102        state->in = (unsigned char *)malloc(state->want);
103        state->out = (unsigned char *)malloc(state->want << 1);
104        if (state->in == NULL || state->out == NULL) {
105            free(state->out);
106            free(state->in);
107            gz_error(state, Z_MEM_ERROR, "out of memory");
108            return -1;
109        }
110        state->size = state->want;
111
112        /* allocate inflate memory */
113        state->strm.zalloc = Z_NULL;
114        state->strm.zfree = Z_NULL;
115        state->strm.opaque = Z_NULL;
116        state->strm.avail_in = 0;
117        state->strm.next_in = Z_NULL;
118        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
119            free(state->out);
120            free(state->in);
121            state->size = 0;
122            gz_error(state, Z_MEM_ERROR, "out of memory");
123            return -1;
124        }
125    }
126
127    /* get at least the magic bytes in the input buffer */
128    if (strm->avail_in < 2) {
129        if (gz_avail(state) == -1)
130            return -1;
131        if (strm->avail_in == 0)
132            return 0;
133    }
134
135    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
136       a logical dilemma here when considering the case of a partially written
137       gzip file, to wit, if a single 31 byte is written, then we cannot tell
138       whether this is a single-byte file, or just a partially written gzip
139       file -- for here we assume that if a gzip file is being written, then
140       the header will be written in a single operation, so that reading a
141       single byte is sufficient indication that it is not a gzip file) */
142    if (strm->avail_in > 1 &&
143            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
144        inflateReset(strm);
145        state->how = GZIP;
146        state->direct = 0;
147        return 0;
148    }
149
150    /* no gzip header -- if we were decoding gzip before, then this is trailing
151       garbage.  Ignore the trailing garbage and finish. */
152    if (state->direct == 0) {
153        strm->avail_in = 0;
154        state->eof = 1;
155        state->x.have = 0;
156        return 0;
157    }
158
159    /* doing raw i/o, copy any leftover input to output -- this assumes that
160       the output buffer is larger than the input buffer, which also assures
161       space for gzungetc() */
162    state->x.next = state->out;
163    if (strm->avail_in) {
164        memcpy(state->x.next, strm->next_in, strm->avail_in);
165        state->x.have = strm->avail_in;
166        strm->avail_in = 0;
167    }
168    state->how = COPY;
169    state->direct = 1;
170    return 0;
171}
172
173/* Decompress from input to the provided next_out and avail_out in the state.
174   On return, state->x.have and state->x.next point to the just decompressed
175   data.  If the gzip stream completes, state->how is reset to LOOK to look for
176   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
177   on success, -1 on failure. */
178local int gz_decomp(state)
179    gz_statep state;
180{
181    int ret = Z_OK;
182    unsigned had;
183    z_streamp strm = &(state->strm);
184
185    /* fill output buffer up to end of deflate stream */
186    had = strm->avail_out;
187    do {
188        /* get more input for inflate() */
189        if (strm->avail_in == 0 && gz_avail(state) == -1)
190            return -1;
191        if (strm->avail_in == 0) {
192            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
193            break;
194        }
195
196        /* decompress and handle errors */
197        ret = inflate(strm, Z_NO_FLUSH);
198        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
199            gz_error(state, Z_STREAM_ERROR,
200                     "internal error: inflate stream corrupt");
201            return -1;
202        }
203        if (ret == Z_MEM_ERROR) {
204            gz_error(state, Z_MEM_ERROR, "out of memory");
205            return -1;
206        }
207        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
208            gz_error(state, Z_DATA_ERROR,
209                     strm->msg == NULL ? "compressed data error" : strm->msg);
210            return -1;
211        }
212    } while (strm->avail_out && ret != Z_STREAM_END);
213
214    /* update available output */
215    state->x.have = had - strm->avail_out;
216    state->x.next = strm->next_out - state->x.have;
217
218    /* if the gzip stream completed successfully, look for another */
219    if (ret == Z_STREAM_END)
220        state->how = LOOK;
221
222    /* good decompression */
223    return 0;
224}
225
226/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
227   Data is either copied from the input file or decompressed from the input
228   file depending on state->how.  If state->how is LOOK, then a gzip header is
229   looked for to determine whether to copy or decompress.  Returns -1 on error,
230   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
231   end of the input file has been reached and all data has been processed.  */
232local int gz_fetch(state)
233    gz_statep state;
234{
235    z_streamp strm = &(state->strm);
236
237    do {
238        switch(state->how) {
239        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
240            if (gz_look(state) == -1)
241                return -1;
242            if (state->how == LOOK)
243                return 0;
244            break;
245        case COPY:      /* -> COPY */
246            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
247                    == -1)
248                return -1;
249            state->x.next = state->out;
250            return 0;
251        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
252            strm->avail_out = state->size << 1;
253            strm->next_out = state->out;
254            if (gz_decomp(state) == -1)
255                return -1;
256        }
257    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
258    return 0;
259}
260
261/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
262local int gz_skip(state, len)
263    gz_statep state;
264    z_off64_t len;
265{
266    unsigned n;
267
268    /* skip over len bytes or reach end-of-file, whichever comes first */
269    while (len)
270        /* skip over whatever is in output buffer */
271        if (state->x.have) {
272            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
273                (unsigned)len : state->x.have;
274            state->x.have -= n;
275            state->x.next += n;
276            state->x.pos += n;
277            len -= n;
278        }
279
280        /* output buffer empty -- return if we're at the end of the input */
281        else if (state->eof && state->strm.avail_in == 0)
282            break;
283
284        /* need more data to skip -- load up output buffer */
285        else {
286            /* get more output, looking for header if required */
287            if (gz_fetch(state) == -1)
288                return -1;
289        }
290    return 0;
291}
292
293/* Read len bytes into buf from file, or less than len up to the end of the
294   input.  Return the number of bytes read.  If zero is returned, either the
295   end of file was reached, or there was an error.  state->err must be
296   consulted in that case to determine which. */
297local z_size_t gz_read(state, buf, len)
298    gz_statep state;
299    voidp buf;
300    z_size_t len;
301{
302    z_size_t got;
303    unsigned n;
304
305    /* if len is zero, avoid unnecessary operations */
306    if (len == 0)
307        return 0;
308
309    /* process a skip request */
310    if (state->seek) {
311        state->seek = 0;
312        if (gz_skip(state, state->skip) == -1)
313            return 0;
314    }
315
316    /* get len bytes to buf, or less than len if at the end */
317    got = 0;
318    do {
319        /* set n to the maximum amount of len that fits in an unsigned int */
320        n = -1;
321        if (n > len)
322            n = len;
323
324        /* first just try copying data from the output buffer */
325        if (state->x.have) {
326            if (state->x.have < n)
327                n = state->x.have;
328            memcpy(buf, state->x.next, n);
329            state->x.next += n;
330            state->x.have -= n;
331        }
332
333        /* output buffer empty -- return if we're at the end of the input */
334        else if (state->eof && state->strm.avail_in == 0) {
335            state->past = 1;        /* tried to read past end */
336            break;
337        }
338
339        /* need output data -- for small len or new stream load up our output
340           buffer */
341        else if (state->how == LOOK || n < (state->size << 1)) {
342            /* get more output, looking for header if required */
343            if (gz_fetch(state) == -1)
344                return 0;
345            continue;       /* no progress yet -- go back to copy above */
346            /* the copy above assures that we will leave with space in the
347               output buffer, allowing at least one gzungetc() to succeed */
348        }
349
350        /* large len -- read directly into user buffer */
351        else if (state->how == COPY) {      /* read directly */
352            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
353                return 0;
354        }
355
356        /* large len -- decompress directly into user buffer */
357        else {  /* state->how == GZIP */
358            state->strm.avail_out = n;
359            state->strm.next_out = (unsigned char *)buf;
360            if (gz_decomp(state) == -1)
361                return 0;
362            n = state->x.have;
363            state->x.have = 0;
364        }
365
366        /* update progress */
367        len -= n;
368        buf = (char *)buf + n;
369        got += n;
370        state->x.pos += n;
371    } while (len);
372
373    /* return number of bytes read into user buffer */
374    return got;
375}
376
377/* -- see zlib.h -- */
378int ZEXPORT gzread(file, buf, len)
379    gzFile file;
380    voidp buf;
381    unsigned len;
382{
383    gz_statep state;
384
385    /* get internal structure */
386    if (file == NULL)
387        return -1;
388    state = (gz_statep)file;
389
390    /* check that we're reading and that there's no (serious) error */
391    if (state->mode != GZ_READ ||
392            (state->err != Z_OK && state->err != Z_BUF_ERROR))
393        return -1;
394
395    /* since an int is returned, make sure len fits in one, otherwise return
396       with an error (this avoids a flaw in the interface) */
397    if ((int)len < 0) {
398        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
399        return -1;
400    }
401
402    /* read len or fewer bytes to buf */
403    len = gz_read(state, buf, len);
404
405    /* check for an error */
406    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
407        return -1;
408
409    /* return the number of bytes read (this is assured to fit in an int) */
410    return (int)len;
411}
412
413/* -- see zlib.h -- */
414z_size_t ZEXPORT gzfread(buf, size, nitems, file)
415    voidp buf;
416    z_size_t size;
417    z_size_t nitems;
418    gzFile file;
419{
420    z_size_t len;
421    gz_statep state;
422
423    /* get internal structure */
424    if (file == NULL)
425        return 0;
426    state = (gz_statep)file;
427
428    /* check that we're reading and that there's no (serious) error */
429    if (state->mode != GZ_READ ||
430            (state->err != Z_OK && state->err != Z_BUF_ERROR))
431        return 0;
432
433    /* compute bytes to read -- error on overflow */
434    len = nitems * size;
435    if (size && len / size != nitems) {
436        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
437        return 0;
438    }
439
440    /* read len or fewer bytes to buf, return the number of full items read */
441    return len ? gz_read(state, buf, len) / size : 0;
442}
443
444/* -- see zlib.h -- */
445#ifdef Z_PREFIX_SET
446#  undef z_gzgetc
447#else
448#  undef gzgetc
449#endif
450int ZEXPORT gzgetc(file)
451    gzFile file;
452{
453    int ret;
454    unsigned char buf[1];
455    gz_statep state;
456
457    /* get internal structure */
458    if (file == NULL)
459        return -1;
460    state = (gz_statep)file;
461
462    /* check that we're reading and that there's no (serious) error */
463    if (state->mode != GZ_READ ||
464        (state->err != Z_OK && state->err != Z_BUF_ERROR))
465        return -1;
466
467    /* try output buffer (no need to check for skip request) */
468    if (state->x.have) {
469        state->x.have--;
470        state->x.pos++;
471        return *(state->x.next)++;
472    }
473
474    /* nothing there -- try gz_read() */
475    ret = gz_read(state, buf, 1);
476    return ret < 1 ? -1 : buf[0];
477}
478
479int ZEXPORT gzgetc_(file)
480gzFile file;
481{
482    return gzgetc(file);
483}
484
485/* -- see zlib.h -- */
486int ZEXPORT gzungetc(c, file)
487    int c;
488    gzFile file;
489{
490    gz_statep state;
491
492    /* get internal structure */
493    if (file == NULL)
494        return -1;
495    state = (gz_statep)file;
496
497    /* check that we're reading and that there's no (serious) error */
498    if (state->mode != GZ_READ ||
499        (state->err != Z_OK && state->err != Z_BUF_ERROR))
500        return -1;
501
502    /* process a skip request */
503    if (state->seek) {
504        state->seek = 0;
505        if (gz_skip(state, state->skip) == -1)
506            return -1;
507    }
508
509    /* can't push EOF */
510    if (c < 0)
511        return -1;
512
513    /* if output buffer empty, put byte at end (allows more pushing) */
514    if (state->x.have == 0) {
515        state->x.have = 1;
516        state->x.next = state->out + (state->size << 1) - 1;
517        state->x.next[0] = (unsigned char)c;
518        state->x.pos--;
519        state->past = 0;
520        return c;
521    }
522
523    /* if no room, give up (must have already done a gzungetc()) */
524    if (state->x.have == (state->size << 1)) {
525        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
526        return -1;
527    }
528
529    /* slide output data if needed and insert byte before existing data */
530    if (state->x.next == state->out) {
531        unsigned char *src = state->out + state->x.have;
532        unsigned char *dest = state->out + (state->size << 1);
533        while (src > state->out)
534            *--dest = *--src;
535        state->x.next = dest;
536    }
537    state->x.have++;
538    state->x.next--;
539    state->x.next[0] = (unsigned char)c;
540    state->x.pos--;
541    state->past = 0;
542    return c;
543}
544
545/* -- see zlib.h -- */
546char * ZEXPORT gzgets(file, buf, len)
547    gzFile file;
548    char *buf;
549    int len;
550{
551    unsigned left, n;
552    char *str;
553    unsigned char *eol;
554    gz_statep state;
555
556    /* check parameters and get internal structure */
557    if (file == NULL || buf == NULL || len < 1)
558        return NULL;
559    state = (gz_statep)file;
560
561    /* check that we're reading and that there's no (serious) error */
562    if (state->mode != GZ_READ ||
563        (state->err != Z_OK && state->err != Z_BUF_ERROR))
564        return NULL;
565
566    /* process a skip request */
567    if (state->seek) {
568        state->seek = 0;
569        if (gz_skip(state, state->skip) == -1)
570            return NULL;
571    }
572
573    /* copy output bytes up to new line or len - 1, whichever comes first --
574       append a terminating zero to the string (we don't check for a zero in
575       the contents, let the user worry about that) */
576    str = buf;
577    left = (unsigned)len - 1;
578    if (left) do {
579        /* assure that something is in the output buffer */
580        if (state->x.have == 0 && gz_fetch(state) == -1)
581            return NULL;                /* error */
582        if (state->x.have == 0) {       /* end of file */
583            state->past = 1;            /* read past end */
584            break;                      /* return what we have */
585        }
586
587        /* look for end-of-line in current output buffer */
588        n = state->x.have > left ? left : state->x.have;
589        eol = (unsigned char *)memchr(state->x.next, '\n', n);
590        if (eol != NULL)
591            n = (unsigned)(eol - state->x.next) + 1;
592
593        /* copy through end-of-line, or remainder if not found */
594        memcpy(buf, state->x.next, n);
595        state->x.have -= n;
596        state->x.next += n;
597        state->x.pos += n;
598        left -= n;
599        buf += n;
600    } while (left && eol == NULL);
601
602    /* return terminated string, or if nothing, end of file */
603    if (buf == str)
604        return NULL;
605    buf[0] = 0;
606    return str;
607}
608
609/* -- see zlib.h -- */
610int ZEXPORT gzdirect(file)
611    gzFile file;
612{
613    gz_statep state;
614
615    /* get internal structure */
616    if (file == NULL)
617        return 0;
618    state = (gz_statep)file;
619
620    /* if the state is not known, but we can find out, then do so (this is
621       mainly for right after a gzopen() or gzdopen()) */
622    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
623        (void)gz_look(state);
624
625    /* return 1 if transparent, 0 if processing a gzip stream */
626    return state->direct;
627}
628
629/* -- see zlib.h -- */
630int ZEXPORT gzclose_r(file)
631    gzFile file;
632{
633    int ret, err;
634    gz_statep state;
635
636    /* get internal structure */
637    if (file == NULL)
638        return Z_STREAM_ERROR;
639    state = (gz_statep)file;
640
641    /* check that we're reading */
642    if (state->mode != GZ_READ)
643        return Z_STREAM_ERROR;
644
645    /* free memory and close file */
646    if (state->size) {
647        inflateEnd(&(state->strm));
648        free(state->out);
649        free(state->in);
650    }
651    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
652    gz_error(state, Z_OK, NULL);
653    free(state->path);
654    ret = close(state->fd);
655    free(state);
656    return ret ? Z_ERRNO : err;
657}
658