1/*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3 *
4 * This code is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 only, as
6 * published by the Free Software Foundation.  Oracle designates this
7 * particular file as subject to the "Classpath" exception as provided
8 * by Oracle in the LICENSE file that accompanied this code.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 */
24
25/* gzread.c -- zlib functions for reading gzip files
26 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
27 * For conditions of distribution and use, see copyright notice in zlib.h
28 */
29
30#include "gzguts.h"
31
32/* Local functions */
33local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
34local int gz_avail OF((gz_statep));
35local int gz_look OF((gz_statep));
36local int gz_decomp OF((gz_statep));
37local int gz_fetch OF((gz_statep));
38local int gz_skip OF((gz_statep, z_off64_t));
39local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
40
41/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
42   state->fd, and update state->eof, state->err, and state->msg as appropriate.
43   This function needs to loop on read(), since read() is not guaranteed to
44   read the number of bytes requested, depending on the type of descriptor. */
45local int gz_load(state, buf, len, have)
46    gz_statep state;
47    unsigned char *buf;
48    unsigned len;
49    unsigned *have;
50{
51    int ret;
52    unsigned get, max = ((unsigned)-1 >> 2) + 1;
53
54    *have = 0;
55    do {
56        get = len - *have;
57        if (get > max)
58            get = max;
59        ret = read(state->fd, buf + *have, get);
60        if (ret <= 0)
61            break;
62        *have += (unsigned)ret;
63    } while (*have < len);
64    if (ret < 0) {
65        gz_error(state, Z_ERRNO, zstrerror());
66        return -1;
67    }
68    if (ret == 0)
69        state->eof = 1;
70    return 0;
71}
72
73/* Load up input buffer and set eof flag if last data loaded -- return -1 on
74   error, 0 otherwise.  Note that the eof flag is set when the end of the input
75   file is reached, even though there may be unused data in the buffer.  Once
76   that data has been used, no more attempts will be made to read the file.
77   If strm->avail_in != 0, then the current data is moved to the beginning of
78   the input buffer, and then the remainder of the buffer is loaded with the
79   available data from the input file. */
80local int gz_avail(state)
81    gz_statep state;
82{
83    unsigned got;
84    z_streamp strm = &(state->strm);
85
86    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
87        return -1;
88    if (state->eof == 0) {
89        if (strm->avail_in) {       /* copy what's there to the start */
90            unsigned char *p = state->in;
91            unsigned const char *q = strm->next_in;
92            unsigned n = strm->avail_in;
93            do {
94                *p++ = *q++;
95            } while (--n);
96        }
97        if (gz_load(state, state->in + strm->avail_in,
98                    state->size - strm->avail_in, &got) == -1)
99            return -1;
100        strm->avail_in += got;
101        strm->next_in = state->in;
102    }
103    return 0;
104}
105
106/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
107   If this is the first time in, allocate required memory.  state->how will be
108   left unchanged if there is no more input data available, will be set to COPY
109   if there is no gzip header and direct copying will be performed, or it will
110   be set to GZIP for decompression.  If direct copying, then leftover input
111   data from the input buffer will be copied to the output buffer.  In that
112   case, all further file reads will be directly to either the output buffer or
113   a user buffer.  If decompressing, the inflate state will be initialized.
114   gz_look() will return 0 on success or -1 on failure. */
115local int gz_look(state)
116    gz_statep state;
117{
118    z_streamp strm = &(state->strm);
119
120    /* allocate read buffers and inflate memory */
121    if (state->size == 0) {
122        /* allocate buffers */
123        state->in = (unsigned char *)malloc(state->want);
124        state->out = (unsigned char *)malloc(state->want << 1);
125        if (state->in == NULL || state->out == NULL) {
126            free(state->out);
127            free(state->in);
128            gz_error(state, Z_MEM_ERROR, "out of memory");
129            return -1;
130        }
131        state->size = state->want;
132
133        /* allocate inflate memory */
134        state->strm.zalloc = Z_NULL;
135        state->strm.zfree = Z_NULL;
136        state->strm.opaque = Z_NULL;
137        state->strm.avail_in = 0;
138        state->strm.next_in = Z_NULL;
139        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
140            free(state->out);
141            free(state->in);
142            state->size = 0;
143            gz_error(state, Z_MEM_ERROR, "out of memory");
144            return -1;
145        }
146    }
147
148    /* get at least the magic bytes in the input buffer */
149    if (strm->avail_in < 2) {
150        if (gz_avail(state) == -1)
151            return -1;
152        if (strm->avail_in == 0)
153            return 0;
154    }
155
156    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
157       a logical dilemma here when considering the case of a partially written
158       gzip file, to wit, if a single 31 byte is written, then we cannot tell
159       whether this is a single-byte file, or just a partially written gzip
160       file -- for here we assume that if a gzip file is being written, then
161       the header will be written in a single operation, so that reading a
162       single byte is sufficient indication that it is not a gzip file) */
163    if (strm->avail_in > 1 &&
164            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
165        inflateReset(strm);
166        state->how = GZIP;
167        state->direct = 0;
168        return 0;
169    }
170
171    /* no gzip header -- if we were decoding gzip before, then this is trailing
172       garbage.  Ignore the trailing garbage and finish. */
173    if (state->direct == 0) {
174        strm->avail_in = 0;
175        state->eof = 1;
176        state->x.have = 0;
177        return 0;
178    }
179
180    /* doing raw i/o, copy any leftover input to output -- this assumes that
181       the output buffer is larger than the input buffer, which also assures
182       space for gzungetc() */
183    state->x.next = state->out;
184    if (strm->avail_in) {
185        memcpy(state->x.next, strm->next_in, strm->avail_in);
186        state->x.have = strm->avail_in;
187        strm->avail_in = 0;
188    }
189    state->how = COPY;
190    state->direct = 1;
191    return 0;
192}
193
194/* Decompress from input to the provided next_out and avail_out in the state.
195   On return, state->x.have and state->x.next point to the just decompressed
196   data.  If the gzip stream completes, state->how is reset to LOOK to look for
197   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
198   on success, -1 on failure. */
199local int gz_decomp(state)
200    gz_statep state;
201{
202    int ret = Z_OK;
203    unsigned had;
204    z_streamp strm = &(state->strm);
205
206    /* fill output buffer up to end of deflate stream */
207    had = strm->avail_out;
208    do {
209        /* get more input for inflate() */
210        if (strm->avail_in == 0 && gz_avail(state) == -1)
211            return -1;
212        if (strm->avail_in == 0) {
213            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
214            break;
215        }
216
217        /* decompress and handle errors */
218        ret = inflate(strm, Z_NO_FLUSH);
219        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
220            gz_error(state, Z_STREAM_ERROR,
221                     "internal error: inflate stream corrupt");
222            return -1;
223        }
224        if (ret == Z_MEM_ERROR) {
225            gz_error(state, Z_MEM_ERROR, "out of memory");
226            return -1;
227        }
228        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
229            gz_error(state, Z_DATA_ERROR,
230                     strm->msg == NULL ? "compressed data error" : strm->msg);
231            return -1;
232        }
233    } while (strm->avail_out && ret != Z_STREAM_END);
234
235    /* update available output */
236    state->x.have = had - strm->avail_out;
237    state->x.next = strm->next_out - state->x.have;
238
239    /* if the gzip stream completed successfully, look for another */
240    if (ret == Z_STREAM_END)
241        state->how = LOOK;
242
243    /* good decompression */
244    return 0;
245}
246
247/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
248   Data is either copied from the input file or decompressed from the input
249   file depending on state->how.  If state->how is LOOK, then a gzip header is
250   looked for to determine whether to copy or decompress.  Returns -1 on error,
251   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
252   end of the input file has been reached and all data has been processed.  */
253local int gz_fetch(state)
254    gz_statep state;
255{
256    z_streamp strm = &(state->strm);
257
258    do {
259        switch(state->how) {
260        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
261            if (gz_look(state) == -1)
262                return -1;
263            if (state->how == LOOK)
264                return 0;
265            break;
266        case COPY:      /* -> COPY */
267            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
268                    == -1)
269                return -1;
270            state->x.next = state->out;
271            return 0;
272        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
273            strm->avail_out = state->size << 1;
274            strm->next_out = state->out;
275            if (gz_decomp(state) == -1)
276                return -1;
277        }
278    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
279    return 0;
280}
281
282/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
283local int gz_skip(state, len)
284    gz_statep state;
285    z_off64_t len;
286{
287    unsigned n;
288
289    /* skip over len bytes or reach end-of-file, whichever comes first */
290    while (len)
291        /* skip over whatever is in output buffer */
292        if (state->x.have) {
293            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
294                (unsigned)len : state->x.have;
295            state->x.have -= n;
296            state->x.next += n;
297            state->x.pos += n;
298            len -= n;
299        }
300
301        /* output buffer empty -- return if we're at the end of the input */
302        else if (state->eof && state->strm.avail_in == 0)
303            break;
304
305        /* need more data to skip -- load up output buffer */
306        else {
307            /* get more output, looking for header if required */
308            if (gz_fetch(state) == -1)
309                return -1;
310        }
311    return 0;
312}
313
314/* Read len bytes into buf from file, or less than len up to the end of the
315   input.  Return the number of bytes read.  If zero is returned, either the
316   end of file was reached, or there was an error.  state->err must be
317   consulted in that case to determine which. */
318local z_size_t gz_read(state, buf, len)
319    gz_statep state;
320    voidp buf;
321    z_size_t len;
322{
323    z_size_t got;
324    unsigned n;
325
326    /* if len is zero, avoid unnecessary operations */
327    if (len == 0)
328        return 0;
329
330    /* process a skip request */
331    if (state->seek) {
332        state->seek = 0;
333        if (gz_skip(state, state->skip) == -1)
334            return 0;
335    }
336
337    /* get len bytes to buf, or less than len if at the end */
338    got = 0;
339    do {
340        /* set n to the maximum amount of len that fits in an unsigned int */
341        n = -1;
342        if (n > len)
343            n = (unsigned)len;
344
345        /* first just try copying data from the output buffer */
346        if (state->x.have) {
347            if (state->x.have < n)
348                n = state->x.have;
349            memcpy(buf, state->x.next, n);
350            state->x.next += n;
351            state->x.have -= n;
352        }
353
354        /* output buffer empty -- return if we're at the end of the input */
355        else if (state->eof && state->strm.avail_in == 0) {
356            state->past = 1;        /* tried to read past end */
357            break;
358        }
359
360        /* need output data -- for small len or new stream load up our output
361           buffer */
362        else if (state->how == LOOK || n < (state->size << 1)) {
363            /* get more output, looking for header if required */
364            if (gz_fetch(state) == -1)
365                return 0;
366            continue;       /* no progress yet -- go back to copy above */
367            /* the copy above assures that we will leave with space in the
368               output buffer, allowing at least one gzungetc() to succeed */
369        }
370
371        /* large len -- read directly into user buffer */
372        else if (state->how == COPY) {      /* read directly */
373            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
374                return 0;
375        }
376
377        /* large len -- decompress directly into user buffer */
378        else {  /* state->how == GZIP */
379            state->strm.avail_out = n;
380            state->strm.next_out = (unsigned char *)buf;
381            if (gz_decomp(state) == -1)
382                return 0;
383            n = state->x.have;
384            state->x.have = 0;
385        }
386
387        /* update progress */
388        len -= n;
389        buf = (char *)buf + n;
390        got += n;
391        state->x.pos += n;
392    } while (len);
393
394    /* return number of bytes read into user buffer */
395    return got;
396}
397
398/* -- see zlib.h -- */
399int ZEXPORT gzread(file, buf, len)
400    gzFile file;
401    voidp buf;
402    unsigned len;
403{
404    gz_statep state;
405
406    /* get internal structure */
407    if (file == NULL)
408        return -1;
409    state = (gz_statep)file;
410
411    /* check that we're reading and that there's no (serious) error */
412    if (state->mode != GZ_READ ||
413            (state->err != Z_OK && state->err != Z_BUF_ERROR))
414        return -1;
415
416    /* since an int is returned, make sure len fits in one, otherwise return
417       with an error (this avoids a flaw in the interface) */
418    if ((int)len < 0) {
419        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
420        return -1;
421    }
422
423    /* read len or fewer bytes to buf */
424    len = (unsigned)gz_read(state, buf, len);
425
426    /* check for an error */
427    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
428        return -1;
429
430    /* return the number of bytes read (this is assured to fit in an int) */
431    return (int)len;
432}
433
434/* -- see zlib.h -- */
435z_size_t ZEXPORT gzfread(buf, size, nitems, file)
436    voidp buf;
437    z_size_t size;
438    z_size_t nitems;
439    gzFile file;
440{
441    z_size_t len;
442    gz_statep state;
443
444    /* get internal structure */
445    if (file == NULL)
446        return 0;
447    state = (gz_statep)file;
448
449    /* check that we're reading and that there's no (serious) error */
450    if (state->mode != GZ_READ ||
451            (state->err != Z_OK && state->err != Z_BUF_ERROR))
452        return 0;
453
454    /* compute bytes to read -- error on overflow */
455    len = nitems * size;
456    if (size && len / size != nitems) {
457        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
458        return 0;
459    }
460
461    /* read len or fewer bytes to buf, return the number of full items read */
462    return len ? gz_read(state, buf, len) / size : 0;
463}
464
465/* -- see zlib.h -- */
466#ifdef Z_PREFIX_SET
467#  undef z_gzgetc
468#else
469#  undef gzgetc
470#endif
471int ZEXPORT gzgetc(file)
472    gzFile file;
473{
474    int ret;
475    unsigned char buf[1];
476    gz_statep state;
477
478    /* get internal structure */
479    if (file == NULL)
480        return -1;
481    state = (gz_statep)file;
482
483    /* check that we're reading and that there's no (serious) error */
484    if (state->mode != GZ_READ ||
485        (state->err != Z_OK && state->err != Z_BUF_ERROR))
486        return -1;
487
488    /* try output buffer (no need to check for skip request) */
489    if (state->x.have) {
490        state->x.have--;
491        state->x.pos++;
492        return *(state->x.next)++;
493    }
494
495    /* nothing there -- try gz_read() */
496    ret = (int)gz_read(state, buf, 1);
497    return ret < 1 ? -1 : buf[0];
498}
499
500int ZEXPORT gzgetc_(file)
501gzFile file;
502{
503    return gzgetc(file);
504}
505
506/* -- see zlib.h -- */
507int ZEXPORT gzungetc(c, file)
508    int c;
509    gzFile file;
510{
511    gz_statep state;
512
513    /* get internal structure */
514    if (file == NULL)
515        return -1;
516    state = (gz_statep)file;
517
518    /* check that we're reading and that there's no (serious) error */
519    if (state->mode != GZ_READ ||
520        (state->err != Z_OK && state->err != Z_BUF_ERROR))
521        return -1;
522
523    /* process a skip request */
524    if (state->seek) {
525        state->seek = 0;
526        if (gz_skip(state, state->skip) == -1)
527            return -1;
528    }
529
530    /* can't push EOF */
531    if (c < 0)
532        return -1;
533
534    /* if output buffer empty, put byte at end (allows more pushing) */
535    if (state->x.have == 0) {
536        state->x.have = 1;
537        state->x.next = state->out + (state->size << 1) - 1;
538        state->x.next[0] = (unsigned char)c;
539        state->x.pos--;
540        state->past = 0;
541        return c;
542    }
543
544    /* if no room, give up (must have already done a gzungetc()) */
545    if (state->x.have == (state->size << 1)) {
546        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
547        return -1;
548    }
549
550    /* slide output data if needed and insert byte before existing data */
551    if (state->x.next == state->out) {
552        unsigned char *src = state->out + state->x.have;
553        unsigned char *dest = state->out + (state->size << 1);
554        while (src > state->out)
555            *--dest = *--src;
556        state->x.next = dest;
557    }
558    state->x.have++;
559    state->x.next--;
560    state->x.next[0] = (unsigned char)c;
561    state->x.pos--;
562    state->past = 0;
563    return c;
564}
565
566/* -- see zlib.h -- */
567char * ZEXPORT gzgets(file, buf, len)
568    gzFile file;
569    char *buf;
570    int len;
571{
572    unsigned left, n;
573    char *str;
574    unsigned char *eol;
575    gz_statep state;
576
577    /* check parameters and get internal structure */
578    if (file == NULL || buf == NULL || len < 1)
579        return NULL;
580    state = (gz_statep)file;
581
582    /* check that we're reading and that there's no (serious) error */
583    if (state->mode != GZ_READ ||
584        (state->err != Z_OK && state->err != Z_BUF_ERROR))
585        return NULL;
586
587    /* process a skip request */
588    if (state->seek) {
589        state->seek = 0;
590        if (gz_skip(state, state->skip) == -1)
591            return NULL;
592    }
593
594    /* copy output bytes up to new line or len - 1, whichever comes first --
595       append a terminating zero to the string (we don't check for a zero in
596       the contents, let the user worry about that) */
597    str = buf;
598    left = (unsigned)len - 1;
599    if (left) do {
600        /* assure that something is in the output buffer */
601        if (state->x.have == 0 && gz_fetch(state) == -1)
602            return NULL;                /* error */
603        if (state->x.have == 0) {       /* end of file */
604            state->past = 1;            /* read past end */
605            break;                      /* return what we have */
606        }
607
608        /* look for end-of-line in current output buffer */
609        n = state->x.have > left ? left : state->x.have;
610        eol = (unsigned char *)memchr(state->x.next, '\n', n);
611        if (eol != NULL)
612            n = (unsigned)(eol - state->x.next) + 1;
613
614        /* copy through end-of-line, or remainder if not found */
615        memcpy(buf, state->x.next, n);
616        state->x.have -= n;
617        state->x.next += n;
618        state->x.pos += n;
619        left -= n;
620        buf += n;
621    } while (left && eol == NULL);
622
623    /* return terminated string, or if nothing, end of file */
624    if (buf == str)
625        return NULL;
626    buf[0] = 0;
627    return str;
628}
629
630/* -- see zlib.h -- */
631int ZEXPORT gzdirect(file)
632    gzFile file;
633{
634    gz_statep state;
635
636    /* get internal structure */
637    if (file == NULL)
638        return 0;
639    state = (gz_statep)file;
640
641    /* if the state is not known, but we can find out, then do so (this is
642       mainly for right after a gzopen() or gzdopen()) */
643    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
644        (void)gz_look(state);
645
646    /* return 1 if transparent, 0 if processing a gzip stream */
647    return state->direct;
648}
649
650/* -- see zlib.h -- */
651int ZEXPORT gzclose_r(file)
652    gzFile file;
653{
654    int ret, err;
655    gz_statep state;
656
657    /* get internal structure */
658    if (file == NULL)
659        return Z_STREAM_ERROR;
660    state = (gz_statep)file;
661
662    /* check that we're reading */
663    if (state->mode != GZ_READ)
664        return Z_STREAM_ERROR;
665
666    /* free memory and close file */
667    if (state->size) {
668        inflateEnd(&(state->strm));
669        free(state->out);
670        free(state->in);
671    }
672    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
673    gz_error(state, Z_OK, NULL);
674    free(state->path);
675    ret = close(state->fd);
676    free(state);
677    return ret ? Z_ERRNO : err;
678}
679