archive_read_support_filter_gzip.c revision 348607
1/*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "archive_platform.h" 27 28__FBSDID("$FreeBSD$"); 29 30 31#ifdef HAVE_ERRNO_H 32#include <errno.h> 33#endif 34#ifdef HAVE_STDLIB_H 35#include <stdlib.h> 36#endif 37#ifdef HAVE_STRING_H 38#include <string.h> 39#endif 40#ifdef HAVE_LIMITS_H 41#include <limits.h> 42#endif 43#ifdef HAVE_UNISTD_H 44#include <unistd.h> 45#endif 46#ifdef HAVE_ZLIB_H 47#include <zlib.h> 48#endif 49 50#include "archive.h" 51#include "archive_entry.h" 52#include "archive_endian.h" 53#include "archive_private.h" 54#include "archive_read_private.h" 55 56#ifdef HAVE_ZLIB_H 57struct private_data { 58 z_stream stream; 59 char in_stream; 60 unsigned char *out_block; 61 size_t out_block_size; 62 int64_t total_out; 63 unsigned long crc; 64 uint32_t mtime; 65 char *name; 66 char eof; /* True = found end of compressed data. */ 67}; 68 69/* Gzip Filter. */ 70static ssize_t gzip_filter_read(struct archive_read_filter *, const void **); 71static int gzip_filter_close(struct archive_read_filter *); 72#endif 73 74/* 75 * Note that we can detect gzip archives even if we can't decompress 76 * them. (In fact, we like detecting them because we can give better 77 * error messages.) So the bid framework here gets compiled even 78 * if zlib is unavailable. 79 * 80 * TODO: If zlib is unavailable, gzip_bidder_init() should 81 * use the compress_program framework to try to fire up an external 82 * gzip program. 83 */ 84static int gzip_bidder_bid(struct archive_read_filter_bidder *, 85 struct archive_read_filter *); 86static int gzip_bidder_init(struct archive_read_filter *); 87 88#if ARCHIVE_VERSION_NUMBER < 4000000 89/* Deprecated; remove in libarchive 4.0 */ 90int 91archive_read_support_compression_gzip(struct archive *a) 92{ 93 return archive_read_support_filter_gzip(a); 94} 95#endif 96 97int 98archive_read_support_filter_gzip(struct archive *_a) 99{ 100 struct archive_read *a = (struct archive_read *)_a; 101 struct archive_read_filter_bidder *bidder; 102 103 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 104 ARCHIVE_STATE_NEW, "archive_read_support_filter_gzip"); 105 106 if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK) 107 return (ARCHIVE_FATAL); 108 109 bidder->data = NULL; 110 bidder->name = "gzip"; 111 bidder->bid = gzip_bidder_bid; 112 bidder->init = gzip_bidder_init; 113 bidder->options = NULL; 114 bidder->free = NULL; /* No data, so no cleanup necessary. */ 115 /* Signal the extent of gzip support with the return value here. */ 116#if HAVE_ZLIB_H 117 return (ARCHIVE_OK); 118#else 119 archive_set_error(_a, ARCHIVE_ERRNO_MISC, 120 "Using external gzip program"); 121 return (ARCHIVE_WARN); 122#endif 123} 124 125/* 126 * Read and verify the header. 127 * 128 * Returns zero if the header couldn't be validated, else returns 129 * number of bytes in header. If pbits is non-NULL, it receives a 130 * count of bits verified, suitable for use by bidder. 131 */ 132static ssize_t 133peek_at_header(struct archive_read_filter *filter, int *pbits, 134 struct private_data *state) 135{ 136 const unsigned char *p; 137 ssize_t avail, len; 138 int bits = 0; 139 int header_flags; 140 141 /* Start by looking at the first ten bytes of the header, which 142 * is all fixed layout. */ 143 len = 10; 144 p = __archive_read_filter_ahead(filter, len, &avail); 145 if (p == NULL || avail == 0) 146 return (0); 147 /* We only support deflation- third byte must be 0x08. */ 148 if (memcmp(p, "\x1F\x8B\x08", 3) != 0) 149 return (0); 150 bits += 24; 151 if ((p[3] & 0xE0)!= 0) /* No reserved flags set. */ 152 return (0); 153 bits += 3; 154 header_flags = p[3]; 155 /* Bytes 4-7 are mod time in little endian. */ 156 if (state) 157 state->mtime = archive_le32dec(p + 4); 158 /* Byte 8 is deflate flags. */ 159 /* XXXX TODO: return deflate flags back to consume_header for use 160 in initializing the decompressor. */ 161 /* Byte 9 is OS. */ 162 163 /* Optional extra data: 2 byte length plus variable body. */ 164 if (header_flags & 4) { 165 p = __archive_read_filter_ahead(filter, len + 2, &avail); 166 if (p == NULL) 167 return (0); 168 len += ((int)p[len + 1] << 8) | (int)p[len]; 169 len += 2; 170 } 171 172 /* Null-terminated optional filename. */ 173 if (header_flags & 8) { 174 ssize_t file_start = len; 175 do { 176 ++len; 177 if (avail < len) 178 p = __archive_read_filter_ahead(filter, 179 len, &avail); 180 if (p == NULL) 181 return (0); 182 } while (p[len - 1] != 0); 183 184 if (state) { 185 /* Reset the name in case of repeat header reads. */ 186 free(state->name); 187 state->name = strdup((const char *)&p[file_start]); 188 } 189 } 190 191 /* Null-terminated optional comment. */ 192 if (header_flags & 16) { 193 do { 194 ++len; 195 if (avail < len) 196 p = __archive_read_filter_ahead(filter, 197 len, &avail); 198 if (p == NULL) 199 return (0); 200 } while (p[len - 1] != 0); 201 } 202 203 /* Optional header CRC */ 204 if ((header_flags & 2)) { 205 p = __archive_read_filter_ahead(filter, len + 2, &avail); 206 if (p == NULL) 207 return (0); 208#if 0 209 int hcrc = ((int)p[len + 1] << 8) | (int)p[len]; 210 int crc = /* XXX TODO: Compute header CRC. */; 211 if (crc != hcrc) 212 return (0); 213 bits += 16; 214#endif 215 len += 2; 216 } 217 218 if (pbits != NULL) 219 *pbits = bits; 220 return (len); 221} 222 223/* 224 * Bidder just verifies the header and returns the number of verified bits. 225 */ 226static int 227gzip_bidder_bid(struct archive_read_filter_bidder *self, 228 struct archive_read_filter *filter) 229{ 230 int bits_checked; 231 232 (void)self; /* UNUSED */ 233 234 if (peek_at_header(filter, &bits_checked, NULL)) 235 return (bits_checked); 236 return (0); 237} 238 239static int 240gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry) 241{ 242 struct private_data *state; 243 244 state = (struct private_data *)self->data; 245 246 /* A mtime of 0 is considered invalid/missing. */ 247 if (state->mtime != 0) 248 archive_entry_set_mtime(entry, state->mtime, 0); 249 250 /* If the name is available, extract it. */ 251 if (state->name) 252 archive_entry_set_pathname(entry, state->name); 253 254 return (ARCHIVE_OK); 255} 256 257#ifndef HAVE_ZLIB_H 258 259/* 260 * If we don't have the library on this system, we can't do the 261 * decompression directly. We can, however, try to run "gzip -d" 262 * in case that's available. 263 */ 264static int 265gzip_bidder_init(struct archive_read_filter *self) 266{ 267 int r; 268 269 r = __archive_read_program(self, "gzip -d"); 270 /* Note: We set the format here even if __archive_read_program() 271 * above fails. We do, after all, know what the format is 272 * even if we weren't able to read it. */ 273 self->code = ARCHIVE_FILTER_GZIP; 274 self->name = "gzip"; 275 return (r); 276} 277 278#else 279 280/* 281 * Initialize the filter object. 282 */ 283static int 284gzip_bidder_init(struct archive_read_filter *self) 285{ 286 struct private_data *state; 287 static const size_t out_block_size = 64 * 1024; 288 void *out_block; 289 290 self->code = ARCHIVE_FILTER_GZIP; 291 self->name = "gzip"; 292 293 state = (struct private_data *)calloc(sizeof(*state), 1); 294 out_block = (unsigned char *)malloc(out_block_size); 295 if (state == NULL || out_block == NULL) { 296 free(out_block); 297 free(state); 298 archive_set_error(&self->archive->archive, ENOMEM, 299 "Can't allocate data for gzip decompression"); 300 return (ARCHIVE_FATAL); 301 } 302 303 self->data = state; 304 state->out_block_size = out_block_size; 305 state->out_block = out_block; 306 self->read = gzip_filter_read; 307 self->skip = NULL; /* not supported */ 308 self->close = gzip_filter_close; 309 self->read_header = gzip_read_header; 310 311 state->in_stream = 0; /* We're not actually within a stream yet. */ 312 313 return (ARCHIVE_OK); 314} 315 316static int 317consume_header(struct archive_read_filter *self) 318{ 319 struct private_data *state; 320 ssize_t avail; 321 size_t len; 322 int ret; 323 324 state = (struct private_data *)self->data; 325 326 /* If this is a real header, consume it. */ 327 len = peek_at_header(self->upstream, NULL, state); 328 if (len == 0) 329 return (ARCHIVE_EOF); 330 __archive_read_filter_consume(self->upstream, len); 331 332 /* Initialize CRC accumulator. */ 333 state->crc = crc32(0L, NULL, 0); 334 335 /* Initialize compression library. */ 336 state->stream.next_in = (unsigned char *)(uintptr_t) 337 __archive_read_filter_ahead(self->upstream, 1, &avail); 338 state->stream.avail_in = (uInt)avail; 339 ret = inflateInit2(&(state->stream), 340 -15 /* Don't check for zlib header */); 341 342 /* Decipher the error code. */ 343 switch (ret) { 344 case Z_OK: 345 state->in_stream = 1; 346 return (ARCHIVE_OK); 347 case Z_STREAM_ERROR: 348 archive_set_error(&self->archive->archive, 349 ARCHIVE_ERRNO_MISC, 350 "Internal error initializing compression library: " 351 "invalid setup parameter"); 352 break; 353 case Z_MEM_ERROR: 354 archive_set_error(&self->archive->archive, ENOMEM, 355 "Internal error initializing compression library: " 356 "out of memory"); 357 break; 358 case Z_VERSION_ERROR: 359 archive_set_error(&self->archive->archive, 360 ARCHIVE_ERRNO_MISC, 361 "Internal error initializing compression library: " 362 "invalid library version"); 363 break; 364 default: 365 archive_set_error(&self->archive->archive, 366 ARCHIVE_ERRNO_MISC, 367 "Internal error initializing compression library: " 368 " Zlib error %d", ret); 369 break; 370 } 371 return (ARCHIVE_FATAL); 372} 373 374static int 375consume_trailer(struct archive_read_filter *self) 376{ 377 struct private_data *state; 378 const unsigned char *p; 379 ssize_t avail; 380 381 state = (struct private_data *)self->data; 382 383 state->in_stream = 0; 384 switch (inflateEnd(&(state->stream))) { 385 case Z_OK: 386 break; 387 default: 388 archive_set_error(&self->archive->archive, 389 ARCHIVE_ERRNO_MISC, 390 "Failed to clean up gzip decompressor"); 391 return (ARCHIVE_FATAL); 392 } 393 394 /* GZip trailer is a fixed 8 byte structure. */ 395 p = __archive_read_filter_ahead(self->upstream, 8, &avail); 396 if (p == NULL || avail == 0) 397 return (ARCHIVE_FATAL); 398 399 /* XXX TODO: Verify the length and CRC. */ 400 401 /* We've verified the trailer, so consume it now. */ 402 __archive_read_filter_consume(self->upstream, 8); 403 404 return (ARCHIVE_OK); 405} 406 407static ssize_t 408gzip_filter_read(struct archive_read_filter *self, const void **p) 409{ 410 struct private_data *state; 411 size_t decompressed; 412 ssize_t avail_in, max_in; 413 int ret; 414 415 state = (struct private_data *)self->data; 416 417 /* Empty our output buffer. */ 418 state->stream.next_out = state->out_block; 419 state->stream.avail_out = (uInt)state->out_block_size; 420 421 /* Try to fill the output buffer. */ 422 while (state->stream.avail_out > 0 && !state->eof) { 423 /* If we're not in a stream, read a header 424 * and initialize the decompression library. */ 425 if (!state->in_stream) { 426 ret = consume_header(self); 427 if (ret == ARCHIVE_EOF) { 428 state->eof = 1; 429 break; 430 } 431 if (ret < ARCHIVE_OK) 432 return (ret); 433 } 434 435 /* Peek at the next available data. */ 436 /* ZLib treats stream.next_in as const but doesn't declare 437 * it so, hence this ugly cast. */ 438 state->stream.next_in = (unsigned char *)(uintptr_t) 439 __archive_read_filter_ahead(self->upstream, 1, &avail_in); 440 if (state->stream.next_in == NULL) { 441 archive_set_error(&self->archive->archive, 442 ARCHIVE_ERRNO_MISC, 443 "truncated gzip input"); 444 return (ARCHIVE_FATAL); 445 } 446 if (UINT_MAX >= SSIZE_MAX) 447 max_in = SSIZE_MAX; 448 else 449 max_in = UINT_MAX; 450 if (avail_in > max_in) 451 avail_in = max_in; 452 state->stream.avail_in = (uInt)avail_in; 453 454 /* Decompress and consume some of that data. */ 455 ret = inflate(&(state->stream), 0); 456 switch (ret) { 457 case Z_OK: /* Decompressor made some progress. */ 458 __archive_read_filter_consume(self->upstream, 459 avail_in - state->stream.avail_in); 460 break; 461 case Z_STREAM_END: /* Found end of stream. */ 462 __archive_read_filter_consume(self->upstream, 463 avail_in - state->stream.avail_in); 464 /* Consume the stream trailer; release the 465 * decompression library. */ 466 ret = consume_trailer(self); 467 if (ret < ARCHIVE_OK) 468 return (ret); 469 break; 470 default: 471 /* Return an error. */ 472 archive_set_error(&self->archive->archive, 473 ARCHIVE_ERRNO_MISC, 474 "gzip decompression failed"); 475 return (ARCHIVE_FATAL); 476 } 477 } 478 479 /* We've read as much as we can. */ 480 decompressed = state->stream.next_out - state->out_block; 481 state->total_out += decompressed; 482 if (decompressed == 0) 483 *p = NULL; 484 else 485 *p = state->out_block; 486 return (decompressed); 487} 488 489/* 490 * Clean up the decompressor. 491 */ 492static int 493gzip_filter_close(struct archive_read_filter *self) 494{ 495 struct private_data *state; 496 int ret; 497 498 state = (struct private_data *)self->data; 499 ret = ARCHIVE_OK; 500 501 if (state->in_stream) { 502 switch (inflateEnd(&(state->stream))) { 503 case Z_OK: 504 break; 505 default: 506 archive_set_error(&(self->archive->archive), 507 ARCHIVE_ERRNO_MISC, 508 "Failed to clean up gzip compressor"); 509 ret = ARCHIVE_FATAL; 510 } 511 } 512 513 free(state->name); 514 free(state->out_block); 515 free(state); 516 return (ret); 517} 518 519#endif /* HAVE_ZLIB_H */ 520