1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "gzguts.h"
7
8/* Local functions */
9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10local int gz_avail OF((gz_statep));
11local int gz_next4 OF((gz_statep, unsigned long *));
12local int gz_head OF((gz_statep));
13local int gz_decomp OF((gz_statep));
14local int gz_make OF((gz_statep));
15local int gz_skip OF((gz_statep, z_off64_t));
16
17/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
18   state->fd, and update state->eof, state->err, and state->msg as appropriate.
19   This function needs to loop on read(), since read() is not guaranteed to
20   read the number of bytes requested, depending on the type of descriptor. */
21local int gz_load(state, buf, len, have)
22    gz_statep state;
23    unsigned char *buf;
24    unsigned len;
25    unsigned *have;
26{
27    int ret;
28
29    *have = 0;
30    do {
31        ret = read(state->fd, buf + *have, len - *have);
32        if (ret <= 0)
33            break;
34        *have += ret;
35    } while (*have < len);
36    if (ret < 0) {
37        gz_error(state, Z_ERRNO, zstrerror());
38        return -1;
39    }
40    if (ret == 0)
41        state->eof = 1;
42    return 0;
43}
44
45/* Load up input buffer and set eof flag if last data loaded -- return -1 on
46   error, 0 otherwise.  Note that the eof flag is set when the end of the input
47   file is reached, even though there may be unused data in the buffer.  Once
48   that data has been used, no more attempts will be made to read the file.
49   gz_avail() assumes that strm->avail_in == 0. */
50local int gz_avail(state)
51    gz_statep state;
52{
53    z_streamp strm = &(state->strm);
54
55    if (state->err != Z_OK)
56        return -1;
57    if (state->eof == 0) {
58        if (gz_load(state, state->in, state->size,
59                (unsigned *)&(strm->avail_in)) == -1)
60            return -1;
61        strm->next_in = state->in;
62    }
63    return 0;
64}
65
66/* Get next byte from input, or -1 if end or error. */
67#define NEXT() ((strm->avail_in == 0 && gz_avail(state) == -1) ? -1 : \
68                (strm->avail_in == 0 ? -1 : \
69                 (strm->avail_in--, *(strm->next_in)++)))
70
71/* Get a four-byte little-endian integer and return 0 on success and the value
72   in *ret.  Otherwise -1 is returned and *ret is not modified. */
73local int gz_next4(state, ret)
74    gz_statep state;
75    unsigned long *ret;
76{
77    int ch;
78    unsigned long val;
79    z_streamp strm = &(state->strm);
80
81    val = NEXT();
82    val += (unsigned)NEXT() << 8;
83    val += (unsigned long)NEXT() << 16;
84    ch = NEXT();
85    if (ch == -1)
86        return -1;
87    val += (unsigned long)ch << 24;
88    *ret = val;
89    return 0;
90}
91
92/* Look for gzip header, set up for inflate or copy.  state->have must be zero.
93   If this is the first time in, allocate required memory.  state->how will be
94   left unchanged if there is no more input data available, will be set to COPY
95   if there is no gzip header and direct copying will be performed, or it will
96   be set to GZIP for decompression, and the gzip header will be skipped so
97   that the next available input data is the raw deflate stream.  If direct
98   copying, then leftover input data from the input buffer will be copied to
99   the output buffer.  In that case, all further file reads will be directly to
100   either the output buffer or a user buffer.  If decompressing, the inflate
101   state and the check value will be initialized.  gz_head() will return 0 on
102   success or -1 on failure.  Failures may include read errors or gzip header
103   errors.  */
104local int gz_head(state)
105    gz_statep state;
106{
107    z_streamp strm = &(state->strm);
108    int flags;
109    unsigned len;
110
111    /* allocate read buffers and inflate memory */
112    if (state->size == 0) {
113        /* allocate buffers */
114        state->in = malloc(state->want);
115        state->out = malloc(state->want << 1);
116        if (state->in == NULL || state->out == NULL) {
117            if (state->out != NULL)
118                free(state->out);
119            if (state->in != NULL)
120                free(state->in);
121            gz_error(state, Z_MEM_ERROR, "out of memory");
122            return -1;
123        }
124        state->size = state->want;
125
126        /* allocate inflate memory */
127        state->strm.zalloc = Z_NULL;
128        state->strm.zfree = Z_NULL;
129        state->strm.opaque = Z_NULL;
130        state->strm.avail_in = 0;
131        state->strm.next_in = Z_NULL;
132        if (inflateInit2(&(state->strm), -15) != Z_OK) {    /* raw inflate */
133            free(state->out);
134            free(state->in);
135            state->size = 0;
136            gz_error(state, Z_MEM_ERROR, "out of memory");
137            return -1;
138        }
139    }
140
141    /* get some data in the input buffer */
142    if (strm->avail_in == 0) {
143        if (gz_avail(state) == -1)
144            return -1;
145        if (strm->avail_in == 0)
146            return 0;
147    }
148
149    /* look for the gzip magic header bytes 31 and 139 */
150    if (strm->next_in[0] == 31) {
151        strm->avail_in--;
152        strm->next_in++;
153        if (strm->avail_in == 0 && gz_avail(state) == -1)
154            return -1;
155        if (strm->avail_in && strm->next_in[0] == 139) {
156
157			/* add the following to avoid warning messages */
158    		unsigned long val;
159
160            /* we have a gzip header, woo hoo! */
161            strm->avail_in--;
162            strm->next_in++;
163
164            /* skip rest of header */
165            if (NEXT() != 8) {      /* compression method */
166                gz_error(state, Z_DATA_ERROR, "unknown compression method");
167                return -1;
168            }
169            flags = NEXT();
170            if (flags & 0xe0) {     /* reserved flag bits */
171                gz_error(state, Z_DATA_ERROR, "unknown header flags set");
172                return -1;
173            }
174            val = NEXT();                 /* modification time */
175            val = NEXT();
176            val = NEXT();
177            val = NEXT();
178            val = NEXT();                 /* extra flags */
179            val = NEXT();                 /* operating system */
180            if (flags & 4) {        /* extra field */
181                len = (unsigned)NEXT();
182                len += (unsigned)NEXT() << 8;
183                while (len--)
184                    if (NEXT() < 0)
185                        break;
186            }
187            if (flags & 8)          /* file name */
188                while (NEXT() > 0)
189                    ;
190            if (flags & 16)         /* comment */
191                while (NEXT() > 0)
192                    ;
193            if (flags & 2) {        /* header crc */
194                val = NEXT();
195                val = NEXT();
196            }
197            /* an unexpected end of file is not checked for here -- it will be
198               noticed on the first request for uncompressed data */
199
200            /* set up for decompression */
201            inflateReset(strm);
202            strm->adler = crc32(0L, Z_NULL, 0);
203            state->how = GZIP;
204            state->direct = 0;
205            return 0;
206        }
207        else {
208            /* not a gzip file -- save first byte (31) and fall to raw i/o */
209            state->out[0] = 31;
210            state->have = 1;
211        }
212    }
213
214    /* doing raw i/o, save start of raw data for seeking, copy any leftover
215       input to output -- this assumes that the output buffer is larger than
216       the input buffer, which also assures space for gzungetc() */
217    state->raw = state->pos;
218    state->next = state->out;
219    if (strm->avail_in) {
220        memcpy(state->next + state->have, strm->next_in, strm->avail_in);
221        state->have += strm->avail_in;
222        strm->avail_in = 0;
223    }
224    state->how = COPY;
225    state->direct = 1;
226    return 0;
227}
228
229/* Decompress from input to the provided next_out and avail_out in the state.
230   If the end of the compressed data is reached, then verify the gzip trailer
231   check value and length (modulo 2^32).  state->have and state->next are set
232   to point to the just decompressed data, and the crc is updated.  If the
233   trailer is verified, state->how is reset to LOOK to look for the next gzip
234   stream or raw data, once state->have is depleted.  Returns 0 on success, -1
235   on failure.  Failures may include invalid compressed data or a failed gzip
236   trailer verification. */
237local int gz_decomp(state)
238    gz_statep state;
239{
240    int ret;
241    unsigned had;
242    unsigned long crc, len;
243    z_streamp strm = &(state->strm);
244
245    /* fill output buffer up to end of deflate stream */
246    had = strm->avail_out;
247    do {
248        /* get more input for inflate() */
249        if (strm->avail_in == 0 && gz_avail(state) == -1)
250            return -1;
251        if (strm->avail_in == 0) {
252            gz_error(state, Z_DATA_ERROR, "unexpected end of file");
253            return -1;
254        }
255
256        /* decompress and handle errors */
257        ret = inflate(strm, Z_NO_FLUSH);
258        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
259            gz_error(state, Z_STREAM_ERROR,
260                      "internal error: inflate stream corrupt");
261            return -1;
262        }
263        if (ret == Z_MEM_ERROR) {
264            gz_error(state, Z_MEM_ERROR, "out of memory");
265            return -1;
266        }
267        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
268            gz_error(state, Z_DATA_ERROR,
269                      strm->msg == NULL ? "compressed data error" : strm->msg);
270            return -1;
271        }
272    } while (strm->avail_out && ret != Z_STREAM_END);
273
274    /* update available output and crc check value */
275    state->have = had - strm->avail_out;
276    state->next = strm->next_out - state->have;
277    strm->adler = crc32(strm->adler, state->next, state->have);
278
279    /* check gzip trailer if at end of deflate stream */
280    if (ret == Z_STREAM_END) {
281        if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) {
282            gz_error(state, Z_DATA_ERROR, "unexpected end of file");
283            return -1;
284        }
285        if (crc != strm->adler) {
286            gz_error(state, Z_DATA_ERROR, "incorrect data check");
287            return -1;
288        }
289        if (len != (strm->total_out & 0xffffffffL)) {
290            gz_error(state, Z_DATA_ERROR, "incorrect length check");
291            return -1;
292        }
293        state->how = LOOK;      /* ready for next stream, once have is 0 (leave
294                                   state->direct unchanged to remember how) */
295    }
296
297    /* good decompression */
298    return 0;
299}
300
301/* Make data and put in the output buffer.  Assumes that state->have == 0.
302   Data is either copied from the input file or decompressed from the input
303   file depending on state->how.  If state->how is LOOK, then a gzip header is
304   looked for (and skipped if found) to determine wither to copy or decompress.
305   Returns -1 on error, otherwise 0.  gz_make() will leave state->have as COPY
306   or GZIP unless the end of the input file has been reached and all data has
307   been processed.  */
308local int gz_make(state)
309    gz_statep state;
310{
311    z_streamp strm = &(state->strm);
312
313    if (state->how == LOOK) {           /* look for gzip header */
314        if (gz_head(state) == -1)
315            return -1;
316        if (state->have)                /* got some data from gz_head() */
317            return 0;
318    }
319    if (state->how == COPY) {           /* straight copy */
320        if (gz_load(state, state->out, state->size << 1, &(state->have)) == -1)
321            return -1;
322        state->next = state->out;
323    }
324    else if (state->how == GZIP) {      /* decompress */
325        strm->avail_out = state->size << 1;
326        strm->next_out = state->out;
327        if (gz_decomp(state) == -1)
328            return -1;
329    }
330    return 0;
331}
332
333/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
334local int gz_skip(state, len)
335    gz_statep state;
336    z_off64_t len;
337{
338    unsigned n;
339
340    /* skip over len bytes or reach end-of-file, whichever comes first */
341    while (len)
342        /* skip over whatever is in output buffer */
343        if (state->have) {
344            n = GT_OFF(state->have) || (z_off64_t)state->have > len ?
345                (unsigned)len : state->have;
346            state->have -= n;
347            state->next += n;
348            state->pos += n;
349            len -= n;
350        }
351
352        /* output buffer empty -- return if we're at the end of the input */
353        else if (state->eof && state->strm.avail_in == 0)
354            break;
355
356        /* need more data to skip -- load up output buffer */
357        else {
358            /* get more output, looking for header if required */
359            if (gz_make(state) == -1)
360                return -1;
361        }
362    return 0;
363}
364
365/* -- see zlib.h -- */
366int ZEXPORT gzread(file, buf, len)
367    gzFile file;
368    voidp buf;
369    unsigned len;
370{
371    unsigned got, n;
372    gz_statep state;
373    z_streamp strm;
374
375    /* get internal structure */
376    if (file == NULL)
377        return -1;
378    state = (gz_statep)file;
379    strm = &(state->strm);
380
381    /* check that we're reading and that there's no error */
382    if (state->mode != GZ_READ || state->err != Z_OK)
383        return -1;
384
385    /* since an int is returned, make sure len fits in one, otherwise return
386       with an error (this avoids the flaw in the interface) */
387    if ((int)len < 0) {
388        gz_error(state, Z_BUF_ERROR, "requested length does not fit in int");
389        return -1;
390    }
391
392    /* if len is zero, avoid unnecessary operations */
393    if (len == 0)
394        return 0;
395
396    /* process a skip request */
397    if (state->seek) {
398        state->seek = 0;
399        if (gz_skip(state, state->skip) == -1)
400            return -1;
401    }
402
403    /* get len bytes to buf, or less than len if at the end */
404    got = 0;
405    do {
406        /* first just try copying data from the output buffer */
407        if (state->have) {
408            n = state->have > len ? len : state->have;
409            memcpy(buf, state->next, n);
410            state->next += n;
411            state->have -= n;
412        }
413
414        /* output buffer empty -- return if we're at the end of the input */
415        else if (state->eof && strm->avail_in == 0)
416            break;
417
418        /* need output data -- for small len or new stream load up our output
419           buffer */
420        else if (state->how == LOOK || len < (state->size << 1)) {
421            /* get more output, looking for header if required */
422            if (gz_make(state) == -1)
423                return -1;
424            continue;       /* no progress yet -- go back to memcpy() above */
425            /* the copy above assures that we will leave with space in the
426               output buffer, allowing at least one gzungetc() to succeed */
427        }
428
429        /* large len -- read directly into user buffer */
430        else if (state->how == COPY) {      /* read directly */
431            if (gz_load(state, buf, len, &n) == -1)
432                return -1;
433        }
434
435        /* large len -- decompress directly into user buffer */
436        else {  /* state->how == GZIP */
437            strm->avail_out = len;
438            strm->next_out = buf;
439            if (gz_decomp(state) == -1)
440                return -1;
441            n = state->have;
442            state->have = 0;
443        }
444
445        /* update progress */
446        len -= n;
447        buf = (char *)buf + n;
448        got += n;
449        state->pos += n;
450    } while (len);
451
452    /* return number of bytes read into user buffer (will fit in int) */
453    return (int)got;
454}
455
456/* -- see zlib.h -- */
457int ZEXPORT gzgetc(file)
458    gzFile file;
459{
460    int ret;
461    unsigned char buf[1];
462    gz_statep state;
463
464    /* get internal structure */
465    if (file == NULL)
466        return -1;
467    state = (gz_statep)file;
468
469    /* check that we're reading and that there's no error */
470    if (state->mode != GZ_READ || state->err != Z_OK)
471        return -1;
472
473    /* try output buffer (no need to check for skip request) */
474    if (state->have) {
475        state->have--;
476        state->pos++;
477        return *(state->next)++;
478    }
479
480    /* nothing there -- try gzread() */
481    ret = gzread(file, buf, 1);
482    return ret < 1 ? -1 : buf[0];
483}
484
485/* -- see zlib.h -- */
486int ZEXPORT gzungetc(c, file)
487    int c;
488    gzFile file;
489{
490    gz_statep state;
491
492    /* get internal structure */
493    if (file == NULL)
494        return -1;
495    state = (gz_statep)file;
496
497    /* check that we're reading and that there's no error */
498    if (state->mode != GZ_READ || state->err != Z_OK)
499        return -1;
500
501    /* process a skip request */
502    if (state->seek) {
503        state->seek = 0;
504        if (gz_skip(state, state->skip) == -1)
505            return -1;
506    }
507
508    /* can't push EOF */
509    if (c < 0)
510        return -1;
511
512    /* if output buffer empty, put byte at end (allows more pushing) */
513    if (state->have == 0) {
514        state->have = 1;
515        state->next = state->out + (state->size << 1) - 1;
516        state->next[0] = c;
517        state->pos--;
518        return c;
519    }
520
521    /* if no room, give up (must have already done a gzungetc()) */
522    if (state->have == (state->size << 1)) {
523        gz_error(state, Z_BUF_ERROR, "out of room to push characters");
524        return -1;
525    }
526
527    /* slide output data if needed and insert byte before existing data */
528    if (state->next == state->out) {
529        unsigned char *src = state->out + state->have;
530        unsigned char *dest = state->out + (state->size << 1);
531        while (src > state->out)
532            *--dest = *--src;
533        state->next = dest;
534    }
535    state->have++;
536    state->next--;
537    state->next[0] = c;
538    state->pos--;
539    return c;
540}
541
542/* -- see zlib.h -- */
543char * ZEXPORT gzgets(file, buf, len)
544    gzFile file;
545    char *buf;
546    int len;
547{
548    unsigned left, n;
549    char *str;
550    unsigned char *eol;
551    gz_statep state;
552
553    /* check parameters and get internal structure */
554    if (file == NULL || buf == NULL || len < 1)
555        return NULL;
556    state = (gz_statep)file;
557
558    /* check that we're reading and that there's no error */
559    if (state->mode != GZ_READ || state->err != Z_OK)
560        return NULL;
561
562    /* process a skip request */
563    if (state->seek) {
564        state->seek = 0;
565        if (gz_skip(state, state->skip) == -1)
566            return NULL;
567    }
568
569    /* copy output bytes up to new line or len - 1, whichever comes first --
570       append a terminating zero to the string (we don't check for a zero in
571       the contents, let the user worry about that) */
572    str = buf;
573    left = (unsigned)len - 1;
574    if (left) do {
575        /* assure that something is in the output buffer */
576        if (state->have == 0) {
577            if (gz_make(state) == -1)
578                return NULL;            /* error */
579            if (state->have == 0) {     /* end of file */
580                if (buf == str)         /* got bupkus */
581                    return NULL;
582                break;                  /* got something -- return it */
583            }
584        }
585
586        /* look for end-of-line in current output buffer */
587        n = state->have > left ? left : state->have;
588        eol = memchr(state->next, '\n', n);
589        if (eol != NULL)
590            n = (unsigned)(eol - state->next) + 1;
591
592        /* copy through end-of-line, or remainder if not found */
593        memcpy(buf, state->next, n);
594        state->have -= n;
595        state->next += n;
596        state->pos += n;
597        left -= n;
598        buf += n;
599    } while (left && eol == NULL);
600
601    /* found end-of-line or out of space -- terminate string and return it */
602    buf[0] = 0;
603    return str;
604}
605
606/* -- see zlib.h -- */
607int ZEXPORT gzdirect(file)
608    gzFile file;
609{
610    gz_statep state;
611
612    /* get internal structure */
613    if (file == NULL)
614        return 0;
615    state = (gz_statep)file;
616
617    /* check that we're reading */
618    if (state->mode != GZ_READ)
619        return 0;
620
621    /* if the state is not known, but we can find out, then do so (this is
622       mainly for right after a gzopen() or gzdopen()) */
623    if (state->how == LOOK && state->have == 0)
624        (void)gz_head(state);
625
626    /* return 1 if reading direct, 0 if decompressing a gzip stream */
627    return state->direct;
628}
629
630/* -- see zlib.h -- */
631int ZEXPORT gzclose_r(file)
632    gzFile file;
633{
634    int ret;
635    gz_statep state;
636
637    /* get internal structure */
638    if (file == NULL)
639        return Z_STREAM_ERROR;
640    state = (gz_statep)file;
641
642    /* check that we're reading */
643    if (state->mode != GZ_READ)
644        return Z_STREAM_ERROR;
645
646    /* free memory and close file */
647    if (state->size) {
648        inflateEnd(&(state->strm));
649        free(state->out);
650        free(state->in);
651    }
652    gz_error(state, Z_OK, NULL);
653    free(state->path);
654    ret = close(state->fd);
655    free(state);
656    return ret ? Z_ERRNO : Z_OK;
657}
658