1/* gzread.c -- zlib functions for reading gzip files 2 * Copyright (C) 2004, 2005, 2010 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 */ 5 6#include "gzguts.h" 7 8/* Local functions */ 9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); 10local int gz_avail OF((gz_statep)); 11local int gz_next4 OF((gz_statep, unsigned long *)); 12local int gz_head OF((gz_statep)); 13local int gz_decomp OF((gz_statep)); 14local int gz_make OF((gz_statep)); 15local int gz_skip OF((gz_statep, z_off64_t)); 16 17/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from 18 state->fd, and update state->eof, state->err, and state->msg as appropriate. 19 This function needs to loop on read(), since read() is not guaranteed to 20 read the number of bytes requested, depending on the type of descriptor. */ 21local int gz_load(state, buf, len, have) 22 gz_statep state; 23 unsigned char *buf; 24 unsigned len; 25 unsigned *have; 26{ 27 int ret; 28 29 *have = 0; 30 do { 31 ret = read(state->fd, buf + *have, len - *have); 32 if (ret <= 0) 33 break; 34 *have += ret; 35 } while (*have < len); 36 if (ret < 0) { 37 gz_error(state, Z_ERRNO, zstrerror()); 38 return -1; 39 } 40 if (ret == 0) 41 state->eof = 1; 42 return 0; 43} 44 45/* Load up input buffer and set eof flag if last data loaded -- return -1 on 46 error, 0 otherwise. Note that the eof flag is set when the end of the input 47 file is reached, even though there may be unused data in the buffer. Once 48 that data has been used, no more attempts will be made to read the file. 49 gz_avail() assumes that strm->avail_in == 0. */ 50local int gz_avail(state) 51 gz_statep state; 52{ 53 z_streamp strm = &(state->strm); 54 55 if (state->err != Z_OK) 56 return -1; 57 if (state->eof == 0) { 58 if (gz_load(state, state->in, state->size, 59 (unsigned *)&(strm->avail_in)) == -1) 60 return -1; 61 strm->next_in = state->in; 62 } 63 return 0; 64} 65 66/* Get next byte from input, or -1 if end or error. */ 67#define NEXT() ((strm->avail_in == 0 && gz_avail(state) == -1) ? -1 : \ 68 (strm->avail_in == 0 ? -1 : \ 69 (strm->avail_in--, *(strm->next_in)++))) 70 71/* Get a four-byte little-endian integer and return 0 on success and the value 72 in *ret. Otherwise -1 is returned and *ret is not modified. */ 73local int gz_next4(state, ret) 74 gz_statep state; 75 unsigned long *ret; 76{ 77 int ch; 78 unsigned long val; 79 z_streamp strm = &(state->strm); 80 81 val = NEXT(); 82 val += (unsigned)NEXT() << 8; 83 val += (unsigned long)NEXT() << 16; 84 ch = NEXT(); 85 if (ch == -1) 86 return -1; 87 val += (unsigned long)ch << 24; 88 *ret = val; 89 return 0; 90} 91 92/* Look for gzip header, set up for inflate or copy. state->have must be zero. 93 If this is the first time in, allocate required memory. state->how will be 94 left unchanged if there is no more input data available, will be set to COPY 95 if there is no gzip header and direct copying will be performed, or it will 96 be set to GZIP for decompression, and the gzip header will be skipped so 97 that the next available input data is the raw deflate stream. If direct 98 copying, then leftover input data from the input buffer will be copied to 99 the output buffer. In that case, all further file reads will be directly to 100 either the output buffer or a user buffer. If decompressing, the inflate 101 state and the check value will be initialized. gz_head() will return 0 on 102 success or -1 on failure. Failures may include read errors or gzip header 103 errors. */ 104local int gz_head(state) 105 gz_statep state; 106{ 107 z_streamp strm = &(state->strm); 108 int flags; 109 unsigned len; 110 111 /* allocate read buffers and inflate memory */ 112 if (state->size == 0) { 113 /* allocate buffers */ 114 state->in = malloc(state->want); 115 state->out = malloc(state->want << 1); 116 if (state->in == NULL || state->out == NULL) { 117 if (state->out != NULL) 118 free(state->out); 119 if (state->in != NULL) 120 free(state->in); 121 gz_error(state, Z_MEM_ERROR, "out of memory"); 122 return -1; 123 } 124 state->size = state->want; 125 126 /* allocate inflate memory */ 127 state->strm.zalloc = Z_NULL; 128 state->strm.zfree = Z_NULL; 129 state->strm.opaque = Z_NULL; 130 state->strm.avail_in = 0; 131 state->strm.next_in = Z_NULL; 132 if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */ 133 free(state->out); 134 free(state->in); 135 state->size = 0; 136 gz_error(state, Z_MEM_ERROR, "out of memory"); 137 return -1; 138 } 139 } 140 141 /* get some data in the input buffer */ 142 if (strm->avail_in == 0) { 143 if (gz_avail(state) == -1) 144 return -1; 145 if (strm->avail_in == 0) 146 return 0; 147 } 148 149 /* look for the gzip magic header bytes 31 and 139 */ 150 if (strm->next_in[0] == 31) { 151 strm->avail_in--; 152 strm->next_in++; 153 if (strm->avail_in == 0 && gz_avail(state) == -1) 154 return -1; 155 if (strm->avail_in && strm->next_in[0] == 139) { 156 157 /* add the following to avoid warning messages */ 158 unsigned long val; 159 160 /* we have a gzip header, woo hoo! */ 161 strm->avail_in--; 162 strm->next_in++; 163 164 /* skip rest of header */ 165 if (NEXT() != 8) { /* compression method */ 166 gz_error(state, Z_DATA_ERROR, "unknown compression method"); 167 return -1; 168 } 169 flags = NEXT(); 170 if (flags & 0xe0) { /* reserved flag bits */ 171 gz_error(state, Z_DATA_ERROR, "unknown header flags set"); 172 return -1; 173 } 174 val = NEXT(); /* modification time */ 175 val = NEXT(); 176 val = NEXT(); 177 val = NEXT(); 178 val = NEXT(); /* extra flags */ 179 val = NEXT(); /* operating system */ 180 if (flags & 4) { /* extra field */ 181 len = (unsigned)NEXT(); 182 len += (unsigned)NEXT() << 8; 183 while (len--) 184 if (NEXT() < 0) 185 break; 186 } 187 if (flags & 8) /* file name */ 188 while (NEXT() > 0) 189 ; 190 if (flags & 16) /* comment */ 191 while (NEXT() > 0) 192 ; 193 if (flags & 2) { /* header crc */ 194 val = NEXT(); 195 val = NEXT(); 196 } 197 /* an unexpected end of file is not checked for here -- it will be 198 noticed on the first request for uncompressed data */ 199 200 /* set up for decompression */ 201 inflateReset(strm); 202 strm->adler = crc32(0L, Z_NULL, 0); 203 state->how = GZIP; 204 state->direct = 0; 205 return 0; 206 } 207 else { 208 /* not a gzip file -- save first byte (31) and fall to raw i/o */ 209 state->out[0] = 31; 210 state->have = 1; 211 } 212 } 213 214 /* doing raw i/o, save start of raw data for seeking, copy any leftover 215 input to output -- this assumes that the output buffer is larger than 216 the input buffer, which also assures space for gzungetc() */ 217 state->raw = state->pos; 218 state->next = state->out; 219 if (strm->avail_in) { 220 memcpy(state->next + state->have, strm->next_in, strm->avail_in); 221 state->have += strm->avail_in; 222 strm->avail_in = 0; 223 } 224 state->how = COPY; 225 state->direct = 1; 226 return 0; 227} 228 229/* Decompress from input to the provided next_out and avail_out in the state. 230 If the end of the compressed data is reached, then verify the gzip trailer 231 check value and length (modulo 2^32). state->have and state->next are set 232 to point to the just decompressed data, and the crc is updated. If the 233 trailer is verified, state->how is reset to LOOK to look for the next gzip 234 stream or raw data, once state->have is depleted. Returns 0 on success, -1 235 on failure. Failures may include invalid compressed data or a failed gzip 236 trailer verification. */ 237local int gz_decomp(state) 238 gz_statep state; 239{ 240 int ret; 241 unsigned had; 242 unsigned long crc, len; 243 z_streamp strm = &(state->strm); 244 245 /* fill output buffer up to end of deflate stream */ 246 had = strm->avail_out; 247 do { 248 /* get more input for inflate() */ 249 if (strm->avail_in == 0 && gz_avail(state) == -1) 250 return -1; 251 if (strm->avail_in == 0) { 252 gz_error(state, Z_DATA_ERROR, "unexpected end of file"); 253 return -1; 254 } 255 256 /* decompress and handle errors */ 257 ret = inflate(strm, Z_NO_FLUSH); 258 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { 259 gz_error(state, Z_STREAM_ERROR, 260 "internal error: inflate stream corrupt"); 261 return -1; 262 } 263 if (ret == Z_MEM_ERROR) { 264 gz_error(state, Z_MEM_ERROR, "out of memory"); 265 return -1; 266 } 267 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ 268 gz_error(state, Z_DATA_ERROR, 269 strm->msg == NULL ? "compressed data error" : strm->msg); 270 return -1; 271 } 272 } while (strm->avail_out && ret != Z_STREAM_END); 273 274 /* update available output and crc check value */ 275 state->have = had - strm->avail_out; 276 state->next = strm->next_out - state->have; 277 strm->adler = crc32(strm->adler, state->next, state->have); 278 279 /* check gzip trailer if at end of deflate stream */ 280 if (ret == Z_STREAM_END) { 281 if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) { 282 gz_error(state, Z_DATA_ERROR, "unexpected end of file"); 283 return -1; 284 } 285 if (crc != strm->adler) { 286 gz_error(state, Z_DATA_ERROR, "incorrect data check"); 287 return -1; 288 } 289 if (len != (strm->total_out & 0xffffffffL)) { 290 gz_error(state, Z_DATA_ERROR, "incorrect length check"); 291 return -1; 292 } 293 state->how = LOOK; /* ready for next stream, once have is 0 (leave 294 state->direct unchanged to remember how) */ 295 } 296 297 /* good decompression */ 298 return 0; 299} 300 301/* Make data and put in the output buffer. Assumes that state->have == 0. 302 Data is either copied from the input file or decompressed from the input 303 file depending on state->how. If state->how is LOOK, then a gzip header is 304 looked for (and skipped if found) to determine wither to copy or decompress. 305 Returns -1 on error, otherwise 0. gz_make() will leave state->have as COPY 306 or GZIP unless the end of the input file has been reached and all data has 307 been processed. */ 308local int gz_make(state) 309 gz_statep state; 310{ 311 z_streamp strm = &(state->strm); 312 313 if (state->how == LOOK) { /* look for gzip header */ 314 if (gz_head(state) == -1) 315 return -1; 316 if (state->have) /* got some data from gz_head() */ 317 return 0; 318 } 319 if (state->how == COPY) { /* straight copy */ 320 if (gz_load(state, state->out, state->size << 1, &(state->have)) == -1) 321 return -1; 322 state->next = state->out; 323 } 324 else if (state->how == GZIP) { /* decompress */ 325 strm->avail_out = state->size << 1; 326 strm->next_out = state->out; 327 if (gz_decomp(state) == -1) 328 return -1; 329 } 330 return 0; 331} 332 333/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ 334local int gz_skip(state, len) 335 gz_statep state; 336 z_off64_t len; 337{ 338 unsigned n; 339 340 /* skip over len bytes or reach end-of-file, whichever comes first */ 341 while (len) 342 /* skip over whatever is in output buffer */ 343 if (state->have) { 344 n = GT_OFF(state->have) || (z_off64_t)state->have > len ? 345 (unsigned)len : state->have; 346 state->have -= n; 347 state->next += n; 348 state->pos += n; 349 len -= n; 350 } 351 352 /* output buffer empty -- return if we're at the end of the input */ 353 else if (state->eof && state->strm.avail_in == 0) 354 break; 355 356 /* need more data to skip -- load up output buffer */ 357 else { 358 /* get more output, looking for header if required */ 359 if (gz_make(state) == -1) 360 return -1; 361 } 362 return 0; 363} 364 365/* -- see zlib.h -- */ 366int ZEXPORT gzread(file, buf, len) 367 gzFile file; 368 voidp buf; 369 unsigned len; 370{ 371 unsigned got, n; 372 gz_statep state; 373 z_streamp strm; 374 375 /* get internal structure */ 376 if (file == NULL) 377 return -1; 378 state = (gz_statep)file; 379 strm = &(state->strm); 380 381 /* check that we're reading and that there's no error */ 382 if (state->mode != GZ_READ || state->err != Z_OK) 383 return -1; 384 385 /* since an int is returned, make sure len fits in one, otherwise return 386 with an error (this avoids the flaw in the interface) */ 387 if ((int)len < 0) { 388 gz_error(state, Z_BUF_ERROR, "requested length does not fit in int"); 389 return -1; 390 } 391 392 /* if len is zero, avoid unnecessary operations */ 393 if (len == 0) 394 return 0; 395 396 /* process a skip request */ 397 if (state->seek) { 398 state->seek = 0; 399 if (gz_skip(state, state->skip) == -1) 400 return -1; 401 } 402 403 /* get len bytes to buf, or less than len if at the end */ 404 got = 0; 405 do { 406 /* first just try copying data from the output buffer */ 407 if (state->have) { 408 n = state->have > len ? len : state->have; 409 memcpy(buf, state->next, n); 410 state->next += n; 411 state->have -= n; 412 } 413 414 /* output buffer empty -- return if we're at the end of the input */ 415 else if (state->eof && strm->avail_in == 0) 416 break; 417 418 /* need output data -- for small len or new stream load up our output 419 buffer */ 420 else if (state->how == LOOK || len < (state->size << 1)) { 421 /* get more output, looking for header if required */ 422 if (gz_make(state) == -1) 423 return -1; 424 continue; /* no progress yet -- go back to memcpy() above */ 425 /* the copy above assures that we will leave with space in the 426 output buffer, allowing at least one gzungetc() to succeed */ 427 } 428 429 /* large len -- read directly into user buffer */ 430 else if (state->how == COPY) { /* read directly */ 431 if (gz_load(state, buf, len, &n) == -1) 432 return -1; 433 } 434 435 /* large len -- decompress directly into user buffer */ 436 else { /* state->how == GZIP */ 437 strm->avail_out = len; 438 strm->next_out = buf; 439 if (gz_decomp(state) == -1) 440 return -1; 441 n = state->have; 442 state->have = 0; 443 } 444 445 /* update progress */ 446 len -= n; 447 buf = (char *)buf + n; 448 got += n; 449 state->pos += n; 450 } while (len); 451 452 /* return number of bytes read into user buffer (will fit in int) */ 453 return (int)got; 454} 455 456/* -- see zlib.h -- */ 457int ZEXPORT gzgetc(file) 458 gzFile file; 459{ 460 int ret; 461 unsigned char buf[1]; 462 gz_statep state; 463 464 /* get internal structure */ 465 if (file == NULL) 466 return -1; 467 state = (gz_statep)file; 468 469 /* check that we're reading and that there's no error */ 470 if (state->mode != GZ_READ || state->err != Z_OK) 471 return -1; 472 473 /* try output buffer (no need to check for skip request) */ 474 if (state->have) { 475 state->have--; 476 state->pos++; 477 return *(state->next)++; 478 } 479 480 /* nothing there -- try gzread() */ 481 ret = gzread(file, buf, 1); 482 return ret < 1 ? -1 : buf[0]; 483} 484 485/* -- see zlib.h -- */ 486int ZEXPORT gzungetc(c, file) 487 int c; 488 gzFile file; 489{ 490 gz_statep state; 491 492 /* get internal structure */ 493 if (file == NULL) 494 return -1; 495 state = (gz_statep)file; 496 497 /* check that we're reading and that there's no error */ 498 if (state->mode != GZ_READ || state->err != Z_OK) 499 return -1; 500 501 /* process a skip request */ 502 if (state->seek) { 503 state->seek = 0; 504 if (gz_skip(state, state->skip) == -1) 505 return -1; 506 } 507 508 /* can't push EOF */ 509 if (c < 0) 510 return -1; 511 512 /* if output buffer empty, put byte at end (allows more pushing) */ 513 if (state->have == 0) { 514 state->have = 1; 515 state->next = state->out + (state->size << 1) - 1; 516 state->next[0] = c; 517 state->pos--; 518 return c; 519 } 520 521 /* if no room, give up (must have already done a gzungetc()) */ 522 if (state->have == (state->size << 1)) { 523 gz_error(state, Z_BUF_ERROR, "out of room to push characters"); 524 return -1; 525 } 526 527 /* slide output data if needed and insert byte before existing data */ 528 if (state->next == state->out) { 529 unsigned char *src = state->out + state->have; 530 unsigned char *dest = state->out + (state->size << 1); 531 while (src > state->out) 532 *--dest = *--src; 533 state->next = dest; 534 } 535 state->have++; 536 state->next--; 537 state->next[0] = c; 538 state->pos--; 539 return c; 540} 541 542/* -- see zlib.h -- */ 543char * ZEXPORT gzgets(file, buf, len) 544 gzFile file; 545 char *buf; 546 int len; 547{ 548 unsigned left, n; 549 char *str; 550 unsigned char *eol; 551 gz_statep state; 552 553 /* check parameters and get internal structure */ 554 if (file == NULL || buf == NULL || len < 1) 555 return NULL; 556 state = (gz_statep)file; 557 558 /* check that we're reading and that there's no error */ 559 if (state->mode != GZ_READ || state->err != Z_OK) 560 return NULL; 561 562 /* process a skip request */ 563 if (state->seek) { 564 state->seek = 0; 565 if (gz_skip(state, state->skip) == -1) 566 return NULL; 567 } 568 569 /* copy output bytes up to new line or len - 1, whichever comes first -- 570 append a terminating zero to the string (we don't check for a zero in 571 the contents, let the user worry about that) */ 572 str = buf; 573 left = (unsigned)len - 1; 574 if (left) do { 575 /* assure that something is in the output buffer */ 576 if (state->have == 0) { 577 if (gz_make(state) == -1) 578 return NULL; /* error */ 579 if (state->have == 0) { /* end of file */ 580 if (buf == str) /* got bupkus */ 581 return NULL; 582 break; /* got something -- return it */ 583 } 584 } 585 586 /* look for end-of-line in current output buffer */ 587 n = state->have > left ? left : state->have; 588 eol = memchr(state->next, '\n', n); 589 if (eol != NULL) 590 n = (unsigned)(eol - state->next) + 1; 591 592 /* copy through end-of-line, or remainder if not found */ 593 memcpy(buf, state->next, n); 594 state->have -= n; 595 state->next += n; 596 state->pos += n; 597 left -= n; 598 buf += n; 599 } while (left && eol == NULL); 600 601 /* found end-of-line or out of space -- terminate string and return it */ 602 buf[0] = 0; 603 return str; 604} 605 606/* -- see zlib.h -- */ 607int ZEXPORT gzdirect(file) 608 gzFile file; 609{ 610 gz_statep state; 611 612 /* get internal structure */ 613 if (file == NULL) 614 return 0; 615 state = (gz_statep)file; 616 617 /* check that we're reading */ 618 if (state->mode != GZ_READ) 619 return 0; 620 621 /* if the state is not known, but we can find out, then do so (this is 622 mainly for right after a gzopen() or gzdopen()) */ 623 if (state->how == LOOK && state->have == 0) 624 (void)gz_head(state); 625 626 /* return 1 if reading direct, 0 if decompressing a gzip stream */ 627 return state->direct; 628} 629 630/* -- see zlib.h -- */ 631int ZEXPORT gzclose_r(file) 632 gzFile file; 633{ 634 int ret; 635 gz_statep state; 636 637 /* get internal structure */ 638 if (file == NULL) 639 return Z_STREAM_ERROR; 640 state = (gz_statep)file; 641 642 /* check that we're reading */ 643 if (state->mode != GZ_READ) 644 return Z_STREAM_ERROR; 645 646 /* free memory and close file */ 647 if (state->size) { 648 inflateEnd(&(state->strm)); 649 free(state->out); 650 free(state->in); 651 } 652 gz_error(state, Z_OK, NULL); 653 free(state->path); 654 ret = close(state->fd); 655 free(state); 656 return ret ? Z_ERRNO : Z_OK; 657} 658