1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm#include "archive_platform.h" 27228753Smm 28231200Smm__FBSDID("$FreeBSD$"); 29228753Smm 30228753Smm 31228753Smm#ifdef HAVE_ERRNO_H 32228753Smm#include <errno.h> 33228753Smm#endif 34228753Smm#ifdef HAVE_STDLIB_H 35228753Smm#include <stdlib.h> 36228753Smm#endif 37228753Smm#ifdef HAVE_STRING_H 38228753Smm#include <string.h> 39228753Smm#endif 40348607Smm#ifdef HAVE_LIMITS_H 41348607Smm#include <limits.h> 42348607Smm#endif 43228753Smm#ifdef HAVE_UNISTD_H 44228753Smm#include <unistd.h> 45228753Smm#endif 46228753Smm#ifdef HAVE_ZLIB_H 47228753Smm#include <zlib.h> 48228753Smm#endif 49228753Smm 50228753Smm#include "archive.h" 51348607Smm#include "archive_entry.h" 52348607Smm#include "archive_endian.h" 53228753Smm#include "archive_private.h" 54228753Smm#include "archive_read_private.h" 55228753Smm 56228753Smm#ifdef HAVE_ZLIB_H 57228753Smmstruct private_data { 58228753Smm z_stream stream; 59228753Smm char in_stream; 60228753Smm unsigned char *out_block; 61228753Smm size_t out_block_size; 62228753Smm int64_t total_out; 63228753Smm unsigned long crc; 64348607Smm uint32_t mtime; 65348607Smm char *name; 66228753Smm char eof; /* True = found end of compressed data. */ 67228753Smm}; 68228753Smm 69228753Smm/* Gzip Filter. */ 70228753Smmstatic ssize_t gzip_filter_read(struct archive_read_filter *, const void **); 71228753Smmstatic int gzip_filter_close(struct archive_read_filter *); 72228753Smm#endif 73228753Smm 74228753Smm/* 75228753Smm * Note that we can detect gzip archives even if we can't decompress 76228753Smm * them. (In fact, we like detecting them because we can give better 77228753Smm * error messages.) So the bid framework here gets compiled even 78228753Smm * if zlib is unavailable. 79228753Smm * 80228753Smm * TODO: If zlib is unavailable, gzip_bidder_init() should 81228753Smm * use the compress_program framework to try to fire up an external 82248616Smm * gzip program. 83228753Smm */ 84228753Smmstatic int gzip_bidder_bid(struct archive_read_filter_bidder *, 85228753Smm struct archive_read_filter *); 86228753Smmstatic int gzip_bidder_init(struct archive_read_filter *); 87228753Smm 88231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000 89231200Smm/* Deprecated; remove in libarchive 4.0 */ 90228753Smmint 91231200Smmarchive_read_support_compression_gzip(struct archive *a) 92228753Smm{ 93231200Smm return archive_read_support_filter_gzip(a); 94231200Smm} 95231200Smm#endif 96231200Smm 97231200Smmint 98231200Smmarchive_read_support_filter_gzip(struct archive *_a) 99231200Smm{ 100228753Smm struct archive_read *a = (struct archive_read *)_a; 101231200Smm struct archive_read_filter_bidder *bidder; 102228753Smm 103231200Smm archive_check_magic(_a, ARCHIVE_READ_MAGIC, 104231200Smm ARCHIVE_STATE_NEW, "archive_read_support_filter_gzip"); 105231200Smm 106231200Smm if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK) 107228753Smm return (ARCHIVE_FATAL); 108228753Smm 109228753Smm bidder->data = NULL; 110248616Smm bidder->name = "gzip"; 111228753Smm bidder->bid = gzip_bidder_bid; 112228753Smm bidder->init = gzip_bidder_init; 113228753Smm bidder->options = NULL; 114228753Smm bidder->free = NULL; /* No data, so no cleanup necessary. */ 115228753Smm /* Signal the extent of gzip support with the return value here. */ 116228753Smm#if HAVE_ZLIB_H 117228753Smm return (ARCHIVE_OK); 118228753Smm#else 119228753Smm archive_set_error(_a, ARCHIVE_ERRNO_MISC, 120248616Smm "Using external gzip program"); 121228753Smm return (ARCHIVE_WARN); 122228753Smm#endif 123228753Smm} 124228753Smm 125228753Smm/* 126228753Smm * Read and verify the header. 127228753Smm * 128228753Smm * Returns zero if the header couldn't be validated, else returns 129228753Smm * number of bytes in header. If pbits is non-NULL, it receives a 130228753Smm * count of bits verified, suitable for use by bidder. 131228753Smm */ 132248616Smmstatic ssize_t 133348607Smmpeek_at_header(struct archive_read_filter *filter, int *pbits, 134353376Smm#ifdef HAVE_ZLIB_H 135353376Smm struct private_data *state 136353376Smm#else 137353376Smm void *state 138353376Smm#endif 139353376Smm ) 140228753Smm{ 141228753Smm const unsigned char *p; 142228753Smm ssize_t avail, len; 143228753Smm int bits = 0; 144228753Smm int header_flags; 145353376Smm#ifndef HAVE_ZLIB_H 146353376Smm (void)state; /* UNUSED */ 147353376Smm#endif 148228753Smm 149228753Smm /* Start by looking at the first ten bytes of the header, which 150228753Smm * is all fixed layout. */ 151228753Smm len = 10; 152228753Smm p = __archive_read_filter_ahead(filter, len, &avail); 153228753Smm if (p == NULL || avail == 0) 154228753Smm return (0); 155231200Smm /* We only support deflation- third byte must be 0x08. */ 156231200Smm if (memcmp(p, "\x1F\x8B\x08", 3) != 0) 157228753Smm return (0); 158231200Smm bits += 24; 159228753Smm if ((p[3] & 0xE0)!= 0) /* No reserved flags set. */ 160228753Smm return (0); 161228753Smm bits += 3; 162228753Smm header_flags = p[3]; 163348607Smm /* Bytes 4-7 are mod time in little endian. */ 164353376Smm#ifdef HAVE_ZLIB_H 165348607Smm if (state) 166348607Smm state->mtime = archive_le32dec(p + 4); 167353376Smm#endif 168228753Smm /* Byte 8 is deflate flags. */ 169228753Smm /* XXXX TODO: return deflate flags back to consume_header for use 170228753Smm in initializing the decompressor. */ 171228753Smm /* Byte 9 is OS. */ 172228753Smm 173228753Smm /* Optional extra data: 2 byte length plus variable body. */ 174228753Smm if (header_flags & 4) { 175228753Smm p = __archive_read_filter_ahead(filter, len + 2, &avail); 176228753Smm if (p == NULL) 177228753Smm return (0); 178228753Smm len += ((int)p[len + 1] << 8) | (int)p[len]; 179228753Smm len += 2; 180228753Smm } 181228753Smm 182228753Smm /* Null-terminated optional filename. */ 183228753Smm if (header_flags & 8) { 184353376Smm#ifdef HAVE_ZLIB_H 185348607Smm ssize_t file_start = len; 186353376Smm#endif 187228753Smm do { 188228753Smm ++len; 189228753Smm if (avail < len) 190228753Smm p = __archive_read_filter_ahead(filter, 191228753Smm len, &avail); 192228753Smm if (p == NULL) 193228753Smm return (0); 194228753Smm } while (p[len - 1] != 0); 195348607Smm 196353376Smm#ifdef HAVE_ZLIB_H 197348607Smm if (state) { 198348607Smm /* Reset the name in case of repeat header reads. */ 199348607Smm free(state->name); 200348607Smm state->name = strdup((const char *)&p[file_start]); 201348607Smm } 202353376Smm#endif 203228753Smm } 204228753Smm 205228753Smm /* Null-terminated optional comment. */ 206228753Smm if (header_flags & 16) { 207228753Smm do { 208228753Smm ++len; 209228753Smm if (avail < len) 210228753Smm p = __archive_read_filter_ahead(filter, 211228753Smm len, &avail); 212228753Smm if (p == NULL) 213228753Smm return (0); 214228753Smm } while (p[len - 1] != 0); 215228753Smm } 216228753Smm 217228753Smm /* Optional header CRC */ 218228753Smm if ((header_flags & 2)) { 219228753Smm p = __archive_read_filter_ahead(filter, len + 2, &avail); 220228753Smm if (p == NULL) 221228753Smm return (0); 222228753Smm#if 0 223228753Smm int hcrc = ((int)p[len + 1] << 8) | (int)p[len]; 224228753Smm int crc = /* XXX TODO: Compute header CRC. */; 225228753Smm if (crc != hcrc) 226228753Smm return (0); 227228753Smm bits += 16; 228228753Smm#endif 229228753Smm len += 2; 230228753Smm } 231228753Smm 232228753Smm if (pbits != NULL) 233228753Smm *pbits = bits; 234228753Smm return (len); 235228753Smm} 236228753Smm 237228753Smm/* 238228753Smm * Bidder just verifies the header and returns the number of verified bits. 239228753Smm */ 240228753Smmstatic int 241228753Smmgzip_bidder_bid(struct archive_read_filter_bidder *self, 242228753Smm struct archive_read_filter *filter) 243228753Smm{ 244228753Smm int bits_checked; 245228753Smm 246228753Smm (void)self; /* UNUSED */ 247228753Smm 248348607Smm if (peek_at_header(filter, &bits_checked, NULL)) 249228753Smm return (bits_checked); 250228753Smm return (0); 251228753Smm} 252228753Smm 253228753Smm#ifndef HAVE_ZLIB_H 254228753Smm 255228753Smm/* 256228753Smm * If we don't have the library on this system, we can't do the 257248616Smm * decompression directly. We can, however, try to run "gzip -d" 258228753Smm * in case that's available. 259228753Smm */ 260228753Smmstatic int 261228753Smmgzip_bidder_init(struct archive_read_filter *self) 262228753Smm{ 263228753Smm int r; 264228753Smm 265248616Smm r = __archive_read_program(self, "gzip -d"); 266228753Smm /* Note: We set the format here even if __archive_read_program() 267228753Smm * above fails. We do, after all, know what the format is 268228753Smm * even if we weren't able to read it. */ 269248616Smm self->code = ARCHIVE_FILTER_GZIP; 270228753Smm self->name = "gzip"; 271228753Smm return (r); 272228753Smm} 273228753Smm 274228753Smm#else 275228753Smm 276353376Smmstatic int 277353376Smmgzip_read_header(struct archive_read_filter *self, struct archive_entry *entry) 278353376Smm{ 279353376Smm struct private_data *state; 280353376Smm 281353376Smm state = (struct private_data *)self->data; 282353376Smm 283353376Smm /* A mtime of 0 is considered invalid/missing. */ 284353376Smm if (state->mtime != 0) 285353376Smm archive_entry_set_mtime(entry, state->mtime, 0); 286353376Smm 287353376Smm /* If the name is available, extract it. */ 288353376Smm if (state->name) 289353376Smm archive_entry_set_pathname(entry, state->name); 290353376Smm 291353376Smm return (ARCHIVE_OK); 292353376Smm} 293353376Smm 294228753Smm/* 295228753Smm * Initialize the filter object. 296228753Smm */ 297228753Smmstatic int 298228753Smmgzip_bidder_init(struct archive_read_filter *self) 299228753Smm{ 300228753Smm struct private_data *state; 301228753Smm static const size_t out_block_size = 64 * 1024; 302228753Smm void *out_block; 303228753Smm 304248616Smm self->code = ARCHIVE_FILTER_GZIP; 305228753Smm self->name = "gzip"; 306228753Smm 307228753Smm state = (struct private_data *)calloc(sizeof(*state), 1); 308228753Smm out_block = (unsigned char *)malloc(out_block_size); 309228753Smm if (state == NULL || out_block == NULL) { 310228753Smm free(out_block); 311228753Smm free(state); 312228753Smm archive_set_error(&self->archive->archive, ENOMEM, 313228753Smm "Can't allocate data for gzip decompression"); 314228753Smm return (ARCHIVE_FATAL); 315228753Smm } 316228753Smm 317228753Smm self->data = state; 318228753Smm state->out_block_size = out_block_size; 319228753Smm state->out_block = out_block; 320228753Smm self->read = gzip_filter_read; 321228753Smm self->skip = NULL; /* not supported */ 322228753Smm self->close = gzip_filter_close; 323353376Smm#ifdef HAVE_ZLIB_H 324348607Smm self->read_header = gzip_read_header; 325353376Smm#endif 326228753Smm 327228753Smm state->in_stream = 0; /* We're not actually within a stream yet. */ 328228753Smm 329228753Smm return (ARCHIVE_OK); 330228753Smm} 331228753Smm 332228753Smmstatic int 333228753Smmconsume_header(struct archive_read_filter *self) 334228753Smm{ 335228753Smm struct private_data *state; 336228753Smm ssize_t avail; 337228753Smm size_t len; 338228753Smm int ret; 339228753Smm 340228753Smm state = (struct private_data *)self->data; 341228753Smm 342228753Smm /* If this is a real header, consume it. */ 343348607Smm len = peek_at_header(self->upstream, NULL, state); 344228753Smm if (len == 0) 345228753Smm return (ARCHIVE_EOF); 346228753Smm __archive_read_filter_consume(self->upstream, len); 347228753Smm 348228753Smm /* Initialize CRC accumulator. */ 349228753Smm state->crc = crc32(0L, NULL, 0); 350228753Smm 351228753Smm /* Initialize compression library. */ 352228753Smm state->stream.next_in = (unsigned char *)(uintptr_t) 353228753Smm __archive_read_filter_ahead(self->upstream, 1, &avail); 354248616Smm state->stream.avail_in = (uInt)avail; 355228753Smm ret = inflateInit2(&(state->stream), 356228753Smm -15 /* Don't check for zlib header */); 357228753Smm 358228753Smm /* Decipher the error code. */ 359228753Smm switch (ret) { 360228753Smm case Z_OK: 361228753Smm state->in_stream = 1; 362228753Smm return (ARCHIVE_OK); 363228753Smm case Z_STREAM_ERROR: 364228753Smm archive_set_error(&self->archive->archive, 365228753Smm ARCHIVE_ERRNO_MISC, 366228753Smm "Internal error initializing compression library: " 367228753Smm "invalid setup parameter"); 368228753Smm break; 369228753Smm case Z_MEM_ERROR: 370228753Smm archive_set_error(&self->archive->archive, ENOMEM, 371228753Smm "Internal error initializing compression library: " 372228753Smm "out of memory"); 373228753Smm break; 374228753Smm case Z_VERSION_ERROR: 375228753Smm archive_set_error(&self->archive->archive, 376228753Smm ARCHIVE_ERRNO_MISC, 377228753Smm "Internal error initializing compression library: " 378228753Smm "invalid library version"); 379228753Smm break; 380228753Smm default: 381228753Smm archive_set_error(&self->archive->archive, 382228753Smm ARCHIVE_ERRNO_MISC, 383228753Smm "Internal error initializing compression library: " 384228753Smm " Zlib error %d", ret); 385228753Smm break; 386228753Smm } 387228753Smm return (ARCHIVE_FATAL); 388228753Smm} 389228753Smm 390228753Smmstatic int 391228753Smmconsume_trailer(struct archive_read_filter *self) 392228753Smm{ 393228753Smm struct private_data *state; 394228753Smm const unsigned char *p; 395228753Smm ssize_t avail; 396228753Smm 397228753Smm state = (struct private_data *)self->data; 398228753Smm 399228753Smm state->in_stream = 0; 400228753Smm switch (inflateEnd(&(state->stream))) { 401228753Smm case Z_OK: 402228753Smm break; 403228753Smm default: 404228753Smm archive_set_error(&self->archive->archive, 405228753Smm ARCHIVE_ERRNO_MISC, 406228753Smm "Failed to clean up gzip decompressor"); 407228753Smm return (ARCHIVE_FATAL); 408228753Smm } 409228753Smm 410228753Smm /* GZip trailer is a fixed 8 byte structure. */ 411228753Smm p = __archive_read_filter_ahead(self->upstream, 8, &avail); 412228753Smm if (p == NULL || avail == 0) 413228753Smm return (ARCHIVE_FATAL); 414228753Smm 415228753Smm /* XXX TODO: Verify the length and CRC. */ 416228753Smm 417228753Smm /* We've verified the trailer, so consume it now. */ 418228753Smm __archive_read_filter_consume(self->upstream, 8); 419228753Smm 420228753Smm return (ARCHIVE_OK); 421228753Smm} 422228753Smm 423228753Smmstatic ssize_t 424228753Smmgzip_filter_read(struct archive_read_filter *self, const void **p) 425228753Smm{ 426228753Smm struct private_data *state; 427228753Smm size_t decompressed; 428348607Smm ssize_t avail_in, max_in; 429228753Smm int ret; 430228753Smm 431228753Smm state = (struct private_data *)self->data; 432228753Smm 433228753Smm /* Empty our output buffer. */ 434228753Smm state->stream.next_out = state->out_block; 435248616Smm state->stream.avail_out = (uInt)state->out_block_size; 436228753Smm 437228753Smm /* Try to fill the output buffer. */ 438228753Smm while (state->stream.avail_out > 0 && !state->eof) { 439228753Smm /* If we're not in a stream, read a header 440228753Smm * and initialize the decompression library. */ 441228753Smm if (!state->in_stream) { 442228753Smm ret = consume_header(self); 443228753Smm if (ret == ARCHIVE_EOF) { 444228753Smm state->eof = 1; 445228753Smm break; 446228753Smm } 447228753Smm if (ret < ARCHIVE_OK) 448228753Smm return (ret); 449228753Smm } 450228753Smm 451228753Smm /* Peek at the next available data. */ 452228753Smm /* ZLib treats stream.next_in as const but doesn't declare 453228753Smm * it so, hence this ugly cast. */ 454228753Smm state->stream.next_in = (unsigned char *)(uintptr_t) 455228753Smm __archive_read_filter_ahead(self->upstream, 1, &avail_in); 456231200Smm if (state->stream.next_in == NULL) { 457231200Smm archive_set_error(&self->archive->archive, 458231200Smm ARCHIVE_ERRNO_MISC, 459231200Smm "truncated gzip input"); 460228753Smm return (ARCHIVE_FATAL); 461231200Smm } 462348607Smm if (UINT_MAX >= SSIZE_MAX) 463348607Smm max_in = SSIZE_MAX; 464348607Smm else 465348607Smm max_in = UINT_MAX; 466348607Smm if (avail_in > max_in) 467348607Smm avail_in = max_in; 468248616Smm state->stream.avail_in = (uInt)avail_in; 469228753Smm 470228753Smm /* Decompress and consume some of that data. */ 471228753Smm ret = inflate(&(state->stream), 0); 472228753Smm switch (ret) { 473228753Smm case Z_OK: /* Decompressor made some progress. */ 474228753Smm __archive_read_filter_consume(self->upstream, 475228753Smm avail_in - state->stream.avail_in); 476228753Smm break; 477228753Smm case Z_STREAM_END: /* Found end of stream. */ 478228753Smm __archive_read_filter_consume(self->upstream, 479228753Smm avail_in - state->stream.avail_in); 480228753Smm /* Consume the stream trailer; release the 481228753Smm * decompression library. */ 482228753Smm ret = consume_trailer(self); 483228753Smm if (ret < ARCHIVE_OK) 484228753Smm return (ret); 485228753Smm break; 486228753Smm default: 487228753Smm /* Return an error. */ 488228753Smm archive_set_error(&self->archive->archive, 489228753Smm ARCHIVE_ERRNO_MISC, 490228753Smm "gzip decompression failed"); 491228753Smm return (ARCHIVE_FATAL); 492228753Smm } 493228753Smm } 494228753Smm 495228753Smm /* We've read as much as we can. */ 496228753Smm decompressed = state->stream.next_out - state->out_block; 497228753Smm state->total_out += decompressed; 498228753Smm if (decompressed == 0) 499228753Smm *p = NULL; 500228753Smm else 501228753Smm *p = state->out_block; 502228753Smm return (decompressed); 503228753Smm} 504228753Smm 505228753Smm/* 506228753Smm * Clean up the decompressor. 507228753Smm */ 508228753Smmstatic int 509228753Smmgzip_filter_close(struct archive_read_filter *self) 510228753Smm{ 511228753Smm struct private_data *state; 512228753Smm int ret; 513228753Smm 514228753Smm state = (struct private_data *)self->data; 515228753Smm ret = ARCHIVE_OK; 516228753Smm 517228753Smm if (state->in_stream) { 518228753Smm switch (inflateEnd(&(state->stream))) { 519228753Smm case Z_OK: 520228753Smm break; 521228753Smm default: 522228753Smm archive_set_error(&(self->archive->archive), 523228753Smm ARCHIVE_ERRNO_MISC, 524228753Smm "Failed to clean up gzip compressor"); 525228753Smm ret = ARCHIVE_FATAL; 526228753Smm } 527228753Smm } 528228753Smm 529348607Smm free(state->name); 530228753Smm free(state->out_block); 531228753Smm free(state); 532228753Smm return (ret); 533228753Smm} 534228753Smm 535228753Smm#endif /* HAVE_ZLIB_H */ 536