1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm#include "archive_platform.h" 27228753Smm 28229592Smm__FBSDID("$FreeBSD$"); 29228753Smm 30228753Smm#ifdef HAVE_ERRNO_H 31228753Smm#include <errno.h> 32228753Smm#endif 33228753Smm#include <stdio.h> 34228753Smm#ifdef HAVE_STDLIB_H 35228753Smm#include <stdlib.h> 36228753Smm#endif 37228753Smm#ifdef HAVE_STRING_H 38228753Smm#include <string.h> 39228753Smm#endif 40228753Smm#ifdef HAVE_UNISTD_H 41228753Smm#include <unistd.h> 42228753Smm#endif 43228753Smm#ifdef HAVE_BZLIB_H 44228753Smm#include <bzlib.h> 45228753Smm#endif 46228753Smm 47228753Smm#include "archive.h" 48228753Smm#include "archive_private.h" 49228753Smm#include "archive_read_private.h" 50228753Smm 51228753Smm#if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR) 52228753Smmstruct private_data { 53228753Smm bz_stream stream; 54228753Smm char *out_block; 55228753Smm size_t out_block_size; 56228753Smm char valid; /* True = decompressor is initialized */ 57228753Smm char eof; /* True = found end of compressed data. */ 58228753Smm}; 59228753Smm 60228753Smm/* Bzip2 filter */ 61228753Smmstatic ssize_t bzip2_filter_read(struct archive_read_filter *, const void **); 62228753Smmstatic int bzip2_filter_close(struct archive_read_filter *); 63228753Smm#endif 64228753Smm 65228753Smm/* 66228753Smm * Note that we can detect bzip2 archives even if we can't decompress 67228753Smm * them. (In fact, we like detecting them because we can give better 68228753Smm * error messages.) So the bid framework here gets compiled even 69228753Smm * if bzlib is unavailable. 70228753Smm */ 71228753Smmstatic int bzip2_reader_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); 72228753Smmstatic int bzip2_reader_init(struct archive_read_filter *); 73228753Smmstatic int bzip2_reader_free(struct archive_read_filter_bidder *); 74228753Smm 75228753Smmint 76228753Smmarchive_read_support_compression_bzip2(struct archive *_a) 77228753Smm{ 78228753Smm struct archive_read *a = (struct archive_read *)_a; 79228753Smm struct archive_read_filter_bidder *reader = __archive_read_get_bidder(a); 80228753Smm 81228753Smm if (reader == NULL) 82228753Smm return (ARCHIVE_FATAL); 83228753Smm 84228753Smm reader->data = NULL; 85228753Smm reader->bid = bzip2_reader_bid; 86228753Smm reader->init = bzip2_reader_init; 87228753Smm reader->options = NULL; 88228753Smm reader->free = bzip2_reader_free; 89228753Smm#if defined(HAVE_BZLIB_H) && defined(BZ_CONFIG_ERROR) 90228753Smm return (ARCHIVE_OK); 91228753Smm#else 92228753Smm archive_set_error(_a, ARCHIVE_ERRNO_MISC, 93228753Smm "Using external bunzip2 program"); 94228753Smm return (ARCHIVE_WARN); 95228753Smm#endif 96228753Smm} 97228753Smm 98228753Smmstatic int 99228753Smmbzip2_reader_free(struct archive_read_filter_bidder *self){ 100228753Smm (void)self; /* UNUSED */ 101228753Smm return (ARCHIVE_OK); 102228753Smm} 103228753Smm 104228753Smm/* 105228753Smm * Test whether we can handle this data. 106228753Smm * 107228753Smm * This logic returns zero if any part of the signature fails. It 108228753Smm * also tries to Do The Right Thing if a very short buffer prevents us 109228753Smm * from verifying as much as we would like. 110228753Smm */ 111228753Smmstatic int 112228753Smmbzip2_reader_bid(struct archive_read_filter_bidder *self, struct archive_read_filter *filter) 113228753Smm{ 114228753Smm const unsigned char *buffer; 115228753Smm ssize_t avail; 116228753Smm int bits_checked; 117228753Smm 118228753Smm (void)self; /* UNUSED */ 119228753Smm 120228753Smm /* Minimal bzip2 archive is 14 bytes. */ 121228753Smm buffer = __archive_read_filter_ahead(filter, 14, &avail); 122228753Smm if (buffer == NULL) 123228753Smm return (0); 124228753Smm 125228753Smm /* First three bytes must be "BZh" */ 126228753Smm bits_checked = 0; 127228753Smm if (buffer[0] != 'B' || buffer[1] != 'Z' || buffer[2] != 'h') 128228753Smm return (0); 129228753Smm bits_checked += 24; 130228753Smm 131228753Smm /* Next follows a compression flag which must be an ASCII digit. */ 132228753Smm if (buffer[3] < '1' || buffer[3] > '9') 133228753Smm return (0); 134228753Smm bits_checked += 5; 135228753Smm 136228753Smm /* After BZh[1-9], there must be either a data block 137228753Smm * which begins with 0x314159265359 or an end-of-data 138228753Smm * marker of 0x177245385090. */ 139228753Smm if (memcmp(buffer + 4, "\x31\x41\x59\x26\x53\x59", 6) == 0) 140228753Smm bits_checked += 48; 141228753Smm else if (memcmp(buffer + 4, "\x17\x72\x45\x38\x50\x90", 6) == 0) 142228753Smm bits_checked += 48; 143228753Smm else 144228753Smm return (0); 145228753Smm 146228753Smm return (bits_checked); 147228753Smm} 148228753Smm 149228753Smm#if !defined(HAVE_BZLIB_H) || !defined(BZ_CONFIG_ERROR) 150228753Smm 151228753Smm/* 152228753Smm * If we don't have the library on this system, we can't actually do the 153228753Smm * decompression. We can, however, still detect compressed archives 154228753Smm * and emit a useful message. 155228753Smm */ 156228753Smmstatic int 157228753Smmbzip2_reader_init(struct archive_read_filter *self) 158228753Smm{ 159228753Smm int r; 160228753Smm 161228753Smm r = __archive_read_program(self, "bunzip2"); 162228753Smm /* Note: We set the format here even if __archive_read_program() 163228753Smm * above fails. We do, after all, know what the format is 164228753Smm * even if we weren't able to read it. */ 165228753Smm self->code = ARCHIVE_COMPRESSION_BZIP2; 166228753Smm self->name = "bzip2"; 167228753Smm return (r); 168228753Smm} 169228753Smm 170228753Smm 171228753Smm#else 172228753Smm 173228753Smm/* 174228753Smm * Setup the callbacks. 175228753Smm */ 176228753Smmstatic int 177228753Smmbzip2_reader_init(struct archive_read_filter *self) 178228753Smm{ 179228753Smm static const size_t out_block_size = 64 * 1024; 180228753Smm void *out_block; 181228753Smm struct private_data *state; 182228753Smm 183228753Smm self->code = ARCHIVE_COMPRESSION_BZIP2; 184228753Smm self->name = "bzip2"; 185228753Smm 186228753Smm state = (struct private_data *)calloc(sizeof(*state), 1); 187228753Smm out_block = (unsigned char *)malloc(out_block_size); 188228753Smm if (self == NULL || state == NULL || out_block == NULL) { 189228753Smm archive_set_error(&self->archive->archive, ENOMEM, 190228753Smm "Can't allocate data for bzip2 decompression"); 191228753Smm free(out_block); 192228753Smm free(state); 193228753Smm return (ARCHIVE_FATAL); 194228753Smm } 195228753Smm 196228753Smm self->data = state; 197228753Smm state->out_block_size = out_block_size; 198228753Smm state->out_block = out_block; 199228753Smm self->read = bzip2_filter_read; 200228753Smm self->skip = NULL; /* not supported */ 201228753Smm self->close = bzip2_filter_close; 202228753Smm 203228753Smm return (ARCHIVE_OK); 204228753Smm} 205228753Smm 206228753Smm/* 207228753Smm * Return the next block of decompressed data. 208228753Smm */ 209228753Smmstatic ssize_t 210228753Smmbzip2_filter_read(struct archive_read_filter *self, const void **p) 211228753Smm{ 212228753Smm struct private_data *state; 213228753Smm size_t decompressed; 214228753Smm const char *read_buf; 215228753Smm ssize_t ret; 216228753Smm 217228753Smm state = (struct private_data *)self->data; 218228753Smm 219228753Smm if (state->eof) { 220228753Smm *p = NULL; 221228753Smm return (0); 222228753Smm } 223228753Smm 224228753Smm /* Empty our output buffer. */ 225228753Smm state->stream.next_out = state->out_block; 226228753Smm state->stream.avail_out = state->out_block_size; 227228753Smm 228228753Smm /* Try to fill the output buffer. */ 229228753Smm for (;;) { 230228753Smm if (!state->valid) { 231228753Smm if (bzip2_reader_bid(self->bidder, self->upstream) == 0) { 232228753Smm state->eof = 1; 233228753Smm *p = state->out_block; 234228753Smm decompressed = state->stream.next_out 235228753Smm - state->out_block; 236228753Smm return (decompressed); 237228753Smm } 238228753Smm /* Initialize compression library. */ 239228753Smm ret = BZ2_bzDecompressInit(&(state->stream), 240228753Smm 0 /* library verbosity */, 241228753Smm 0 /* don't use low-mem algorithm */); 242228753Smm 243228753Smm /* If init fails, try low-memory algorithm instead. */ 244228753Smm if (ret == BZ_MEM_ERROR) 245228753Smm ret = BZ2_bzDecompressInit(&(state->stream), 246228753Smm 0 /* library verbosity */, 247228753Smm 1 /* do use low-mem algo */); 248228753Smm 249228753Smm if (ret != BZ_OK) { 250228753Smm const char *detail = NULL; 251228753Smm int err = ARCHIVE_ERRNO_MISC; 252228753Smm switch (ret) { 253228753Smm case BZ_PARAM_ERROR: 254228753Smm detail = "invalid setup parameter"; 255228753Smm break; 256228753Smm case BZ_MEM_ERROR: 257228753Smm err = ENOMEM; 258228753Smm detail = "out of memory"; 259228753Smm break; 260228753Smm case BZ_CONFIG_ERROR: 261228753Smm detail = "mis-compiled library"; 262228753Smm break; 263228753Smm } 264228753Smm archive_set_error(&self->archive->archive, err, 265228753Smm "Internal error initializing decompressor%s%s", 266228753Smm detail == NULL ? "" : ": ", 267228753Smm detail); 268228753Smm return (ARCHIVE_FATAL); 269228753Smm } 270228753Smm state->valid = 1; 271228753Smm } 272228753Smm 273228753Smm /* stream.next_in is really const, but bzlib 274228753Smm * doesn't declare it so. <sigh> */ 275228753Smm read_buf = 276228753Smm __archive_read_filter_ahead(self->upstream, 1, &ret); 277228753Smm if (read_buf == NULL) 278228753Smm return (ARCHIVE_FATAL); 279228753Smm state->stream.next_in = (char *)(uintptr_t)read_buf; 280228753Smm state->stream.avail_in = ret; 281228753Smm /* There is no more data, return whatever we have. */ 282228753Smm if (ret == 0) { 283228753Smm state->eof = 1; 284228753Smm *p = state->out_block; 285228753Smm decompressed = state->stream.next_out 286228753Smm - state->out_block; 287228753Smm return (decompressed); 288228753Smm } 289228753Smm 290228753Smm /* Decompress as much as we can in one pass. */ 291228753Smm ret = BZ2_bzDecompress(&(state->stream)); 292228753Smm __archive_read_filter_consume(self->upstream, 293228753Smm state->stream.next_in - read_buf); 294228753Smm 295228753Smm switch (ret) { 296228753Smm case BZ_STREAM_END: /* Found end of stream. */ 297228753Smm switch (BZ2_bzDecompressEnd(&(state->stream))) { 298228753Smm case BZ_OK: 299228753Smm break; 300228753Smm default: 301228753Smm archive_set_error(&(self->archive->archive), 302228753Smm ARCHIVE_ERRNO_MISC, 303228753Smm "Failed to clean up decompressor"); 304228753Smm return (ARCHIVE_FATAL); 305228753Smm } 306228753Smm state->valid = 0; 307228753Smm /* FALLTHROUGH */ 308228753Smm case BZ_OK: /* Decompressor made some progress. */ 309228753Smm /* If we filled our buffer, update stats and return. */ 310228753Smm if (state->stream.avail_out == 0) { 311228753Smm *p = state->out_block; 312228753Smm decompressed = state->stream.next_out 313228753Smm - state->out_block; 314228753Smm return (decompressed); 315228753Smm } 316228753Smm break; 317228753Smm default: /* Return an error. */ 318228753Smm archive_set_error(&self->archive->archive, 319228753Smm ARCHIVE_ERRNO_MISC, "bzip decompression failed"); 320228753Smm return (ARCHIVE_FATAL); 321228753Smm } 322228753Smm } 323228753Smm} 324228753Smm 325228753Smm/* 326228753Smm * Clean up the decompressor. 327228753Smm */ 328228753Smmstatic int 329228753Smmbzip2_filter_close(struct archive_read_filter *self) 330228753Smm{ 331228753Smm struct private_data *state; 332228753Smm int ret = ARCHIVE_OK; 333228753Smm 334228753Smm state = (struct private_data *)self->data; 335228753Smm 336228753Smm if (state->valid) { 337228753Smm switch (BZ2_bzDecompressEnd(&state->stream)) { 338228753Smm case BZ_OK: 339228753Smm break; 340228753Smm default: 341228753Smm archive_set_error(&self->archive->archive, 342228753Smm ARCHIVE_ERRNO_MISC, 343228753Smm "Failed to clean up decompressor"); 344228753Smm ret = ARCHIVE_FATAL; 345228753Smm } 346228753Smm } 347228753Smm 348228753Smm free(state->out_block); 349228753Smm free(state); 350228753Smm return (ret); 351228753Smm} 352228753Smm 353228753Smm#endif /* HAVE_BZLIB_H && BZ_CONFIG_ERROR */ 354