1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm/* 27228753Smm * This code borrows heavily from "compress" source code, which is 28228753Smm * protected by the following copyright. (Clause 3 dropped by request 29228753Smm * of the Regents.) 30228753Smm */ 31228753Smm 32228753Smm/*- 33228753Smm * Copyright (c) 1985, 1986, 1992, 1993 34228753Smm * The Regents of the University of California. All rights reserved. 35228753Smm * 36228753Smm * This code is derived from software contributed to Berkeley by 37228753Smm * Diomidis Spinellis and James A. Woods, derived from original 38228753Smm * work by Spencer Thomas and Joseph Orost. 39228753Smm * 40228753Smm * Redistribution and use in source and binary forms, with or without 41228753Smm * modification, are permitted provided that the following conditions 42228753Smm * are met: 43228753Smm * 1. Redistributions of source code must retain the above copyright 44228753Smm * notice, this list of conditions and the following disclaimer. 45228753Smm * 2. Redistributions in binary form must reproduce the above copyright 46228753Smm * notice, this list of conditions and the following disclaimer in the 47228753Smm * documentation and/or other materials provided with the distribution. 48228753Smm * 4. Neither the name of the University nor the names of its contributors 49228753Smm * may be used to endorse or promote products derived from this software 50228753Smm * without specific prior written permission. 51228753Smm * 52228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55228753Smm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62228753Smm * SUCH DAMAGE. 63228753Smm */ 64228753Smm 65228753Smm 66228753Smm#include "archive_platform.h" 67231200Smm__FBSDID("$FreeBSD$"); 68228753Smm 69228753Smm#ifdef HAVE_ERRNO_H 70228753Smm#include <errno.h> 71228753Smm#endif 72228753Smm#ifdef HAVE_STDLIB_H 73228753Smm#include <stdlib.h> 74228753Smm#endif 75228753Smm#ifdef HAVE_STRING_H 76228753Smm#include <string.h> 77228753Smm#endif 78228753Smm#ifdef HAVE_UNISTD_H 79228753Smm#include <unistd.h> 80228753Smm#endif 81228753Smm 82228753Smm#include "archive.h" 83228753Smm#include "archive_private.h" 84228753Smm#include "archive_read_private.h" 85228753Smm 86228753Smm/* 87228753Smm * Because LZW decompression is pretty simple, I've just implemented 88228753Smm * the whole decompressor here (cribbing from "compress" source code, 89228753Smm * of course), rather than relying on an external library. I have 90228753Smm * made an effort to clarify and simplify the algorithm, so the 91228753Smm * names and structure here don't exactly match those used by compress. 92228753Smm */ 93228753Smm 94228753Smmstruct private_data { 95228753Smm /* Input variables. */ 96228753Smm const unsigned char *next_in; 97228753Smm size_t avail_in; 98231200Smm size_t consume_unnotified; 99228753Smm int bit_buffer; 100228753Smm int bits_avail; 101228753Smm size_t bytes_in_section; 102228753Smm 103228753Smm /* Output variables. */ 104228753Smm size_t out_block_size; 105228753Smm void *out_block; 106228753Smm 107228753Smm /* Decompression status variables. */ 108228753Smm int use_reset_code; 109228753Smm int end_of_stream; /* EOF status. */ 110228753Smm int maxcode; /* Largest code. */ 111228753Smm int maxcode_bits; /* Length of largest code. */ 112228753Smm int section_end_code; /* When to increase bits. */ 113228753Smm int bits; /* Current code length. */ 114228753Smm int oldcode; /* Previous code. */ 115228753Smm int finbyte; /* Last byte of prev code. */ 116228753Smm 117228753Smm /* Dictionary. */ 118228753Smm int free_ent; /* Next dictionary entry. */ 119228753Smm unsigned char suffix[65536]; 120228753Smm uint16_t prefix[65536]; 121228753Smm 122228753Smm /* 123228753Smm * Scratch area for expanding dictionary entries. Note: 124228753Smm * "worst" case here comes from compressing /dev/zero: the 125228753Smm * last code in the dictionary will code a sequence of 126228753Smm * 65536-256 zero bytes. Thus, we need stack space to expand 127228753Smm * a 65280-byte dictionary entry. (Of course, 32640:1 128228753Smm * compression could also be considered the "best" case. ;-) 129228753Smm */ 130228753Smm unsigned char *stackp; 131228753Smm unsigned char stack[65300]; 132228753Smm}; 133228753Smm 134228753Smmstatic int compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); 135228753Smmstatic int compress_bidder_init(struct archive_read_filter *); 136228753Smmstatic int compress_bidder_free(struct archive_read_filter_bidder *); 137228753Smm 138228753Smmstatic ssize_t compress_filter_read(struct archive_read_filter *, const void **); 139228753Smmstatic int compress_filter_close(struct archive_read_filter *); 140228753Smm 141228753Smmstatic int getbits(struct archive_read_filter *, int n); 142228753Smmstatic int next_code(struct archive_read_filter *); 143228753Smm 144231200Smm#if ARCHIVE_VERSION_NUMBER < 4000000 145231200Smm/* Deprecated; remove in libarchive 4.0 */ 146228753Smmint 147231200Smmarchive_read_support_compression_compress(struct archive *a) 148228753Smm{ 149231200Smm return archive_read_support_filter_compress(a); 150231200Smm} 151231200Smm#endif 152231200Smm 153231200Smmint 154231200Smmarchive_read_support_filter_compress(struct archive *_a) 155231200Smm{ 156228753Smm struct archive_read *a = (struct archive_read *)_a; 157231200Smm struct archive_read_filter_bidder *bidder; 158228753Smm 159231200Smm archive_check_magic(_a, ARCHIVE_READ_MAGIC, 160231200Smm ARCHIVE_STATE_NEW, "archive_read_support_filter_compress"); 161231200Smm 162231200Smm if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK) 163228753Smm return (ARCHIVE_FATAL); 164228753Smm 165228753Smm bidder->data = NULL; 166248616Smm bidder->name = "compress (.Z)"; 167228753Smm bidder->bid = compress_bidder_bid; 168228753Smm bidder->init = compress_bidder_init; 169228753Smm bidder->options = NULL; 170228753Smm bidder->free = compress_bidder_free; 171228753Smm return (ARCHIVE_OK); 172228753Smm} 173228753Smm 174228753Smm/* 175228753Smm * Test whether we can handle this data. 176231200Smm * This logic returns zero if any part of the signature fails. 177228753Smm */ 178228753Smmstatic int 179228753Smmcompress_bidder_bid(struct archive_read_filter_bidder *self, 180228753Smm struct archive_read_filter *filter) 181228753Smm{ 182228753Smm const unsigned char *buffer; 183228753Smm ssize_t avail; 184228753Smm int bits_checked; 185228753Smm 186228753Smm (void)self; /* UNUSED */ 187228753Smm 188299529Smm /* Shortest valid compress file is 3 bytes. */ 189299529Smm buffer = __archive_read_filter_ahead(filter, 3, &avail); 190228753Smm 191228753Smm if (buffer == NULL) 192228753Smm return (0); 193228753Smm 194228753Smm bits_checked = 0; 195299529Smm /* First two bytes are the magic value */ 196231200Smm if (buffer[0] != 0x1F || buffer[1] != 0x9D) 197228753Smm return (0); 198299529Smm /* Third byte holds compression parameters. */ 199299529Smm if (buffer[2] & 0x20) /* Reserved bit, must be zero. */ 200299529Smm return (0); 201299529Smm if (buffer[2] & 0x40) /* Reserved bit, must be zero. */ 202299529Smm return (0); 203299529Smm bits_checked += 18; 204228753Smm 205228753Smm return (bits_checked); 206228753Smm} 207228753Smm 208228753Smm/* 209228753Smm * Setup the callbacks. 210228753Smm */ 211228753Smmstatic int 212228753Smmcompress_bidder_init(struct archive_read_filter *self) 213228753Smm{ 214228753Smm struct private_data *state; 215228753Smm static const size_t out_block_size = 64 * 1024; 216228753Smm void *out_block; 217228753Smm int code; 218228753Smm 219248616Smm self->code = ARCHIVE_FILTER_COMPRESS; 220228753Smm self->name = "compress (.Z)"; 221228753Smm 222228753Smm state = (struct private_data *)calloc(sizeof(*state), 1); 223228753Smm out_block = malloc(out_block_size); 224228753Smm if (state == NULL || out_block == NULL) { 225228753Smm free(out_block); 226228753Smm free(state); 227228753Smm archive_set_error(&self->archive->archive, ENOMEM, 228228753Smm "Can't allocate data for %s decompression", 229228753Smm self->name); 230228753Smm return (ARCHIVE_FATAL); 231228753Smm } 232228753Smm 233228753Smm self->data = state; 234228753Smm state->out_block_size = out_block_size; 235228753Smm state->out_block = out_block; 236228753Smm self->read = compress_filter_read; 237228753Smm self->skip = NULL; /* not supported */ 238228753Smm self->close = compress_filter_close; 239228753Smm 240228753Smm /* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */ 241228753Smm 242228753Smm (void)getbits(self, 8); /* Skip first signature byte. */ 243228753Smm (void)getbits(self, 8); /* Skip second signature byte. */ 244228753Smm 245299529Smm /* Get compression parameters. */ 246228753Smm code = getbits(self, 8); 247299529Smm if ((code & 0x1f) > 16) { 248299529Smm archive_set_error(&self->archive->archive, -1, 249299529Smm "Invalid compressed data"); 250299529Smm return (ARCHIVE_FATAL); 251299529Smm } 252228753Smm state->maxcode_bits = code & 0x1f; 253228753Smm state->maxcode = (1 << state->maxcode_bits); 254228753Smm state->use_reset_code = code & 0x80; 255228753Smm 256228753Smm /* Initialize decompressor. */ 257228753Smm state->free_ent = 256; 258228753Smm state->stackp = state->stack; 259228753Smm if (state->use_reset_code) 260228753Smm state->free_ent++; 261228753Smm state->bits = 9; 262228753Smm state->section_end_code = (1<<state->bits) - 1; 263228753Smm state->oldcode = -1; 264228753Smm for (code = 255; code >= 0; code--) { 265228753Smm state->prefix[code] = 0; 266228753Smm state->suffix[code] = code; 267228753Smm } 268228753Smm next_code(self); 269228753Smm 270228753Smm return (ARCHIVE_OK); 271228753Smm} 272228753Smm 273228753Smm/* 274228753Smm * Return a block of data from the decompression buffer. Decompress more 275228753Smm * as necessary. 276228753Smm */ 277228753Smmstatic ssize_t 278228753Smmcompress_filter_read(struct archive_read_filter *self, const void **pblock) 279228753Smm{ 280228753Smm struct private_data *state; 281228753Smm unsigned char *p, *start, *end; 282228753Smm int ret; 283228753Smm 284228753Smm state = (struct private_data *)self->data; 285228753Smm if (state->end_of_stream) { 286228753Smm *pblock = NULL; 287228753Smm return (0); 288228753Smm } 289228753Smm p = start = (unsigned char *)state->out_block; 290228753Smm end = start + state->out_block_size; 291228753Smm 292228753Smm while (p < end && !state->end_of_stream) { 293228753Smm if (state->stackp > state->stack) { 294228753Smm *p++ = *--state->stackp; 295228753Smm } else { 296228753Smm ret = next_code(self); 297228753Smm if (ret == -1) 298228753Smm state->end_of_stream = ret; 299228753Smm else if (ret != ARCHIVE_OK) 300228753Smm return (ret); 301228753Smm } 302228753Smm } 303228753Smm 304228753Smm *pblock = start; 305228753Smm return (p - start); 306228753Smm} 307228753Smm 308228753Smm/* 309228753Smm * Clean up the reader. 310228753Smm */ 311228753Smmstatic int 312228753Smmcompress_bidder_free(struct archive_read_filter_bidder *self) 313228753Smm{ 314228753Smm self->data = NULL; 315228753Smm return (ARCHIVE_OK); 316228753Smm} 317228753Smm 318228753Smm/* 319228753Smm * Close and release the filter. 320228753Smm */ 321228753Smmstatic int 322228753Smmcompress_filter_close(struct archive_read_filter *self) 323228753Smm{ 324228753Smm struct private_data *state = (struct private_data *)self->data; 325228753Smm 326228753Smm free(state->out_block); 327228753Smm free(state); 328228753Smm return (ARCHIVE_OK); 329228753Smm} 330228753Smm 331228753Smm/* 332228753Smm * Process the next code and fill the stack with the expansion 333228753Smm * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or 334228753Smm * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise. 335228753Smm */ 336228753Smmstatic int 337228753Smmnext_code(struct archive_read_filter *self) 338228753Smm{ 339228753Smm struct private_data *state = (struct private_data *)self->data; 340228753Smm int code, newcode; 341228753Smm 342228753Smm static int debug_buff[1024]; 343228753Smm static unsigned debug_index; 344228753Smm 345228753Smm code = newcode = getbits(self, state->bits); 346228753Smm if (code < 0) 347228753Smm return (code); 348228753Smm 349228753Smm debug_buff[debug_index++] = code; 350228753Smm if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0])) 351228753Smm debug_index = 0; 352228753Smm 353228753Smm /* If it's a reset code, reset the dictionary. */ 354228753Smm if ((code == 256) && state->use_reset_code) { 355228753Smm /* 356228753Smm * The original 'compress' implementation blocked its 357228753Smm * I/O in a manner that resulted in junk bytes being 358228753Smm * inserted after every reset. The next section skips 359228753Smm * this junk. (Yes, the number of *bytes* to skip is 360228753Smm * a function of the current *bit* length.) 361228753Smm */ 362228753Smm int skip_bytes = state->bits - 363228753Smm (state->bytes_in_section % state->bits); 364228753Smm skip_bytes %= state->bits; 365228753Smm state->bits_avail = 0; /* Discard rest of this byte. */ 366228753Smm while (skip_bytes-- > 0) { 367228753Smm code = getbits(self, 8); 368228753Smm if (code < 0) 369228753Smm return (code); 370228753Smm } 371228753Smm /* Now, actually do the reset. */ 372228753Smm state->bytes_in_section = 0; 373228753Smm state->bits = 9; 374228753Smm state->section_end_code = (1 << state->bits) - 1; 375228753Smm state->free_ent = 257; 376228753Smm state->oldcode = -1; 377228753Smm return (next_code(self)); 378228753Smm } 379228753Smm 380299529Smm if (code > state->free_ent 381299529Smm || (code == state->free_ent && state->oldcode < 0)) { 382228753Smm /* An invalid code is a fatal error. */ 383228753Smm archive_set_error(&(self->archive->archive), -1, 384228753Smm "Invalid compressed data"); 385228753Smm return (ARCHIVE_FATAL); 386228753Smm } 387228753Smm 388228753Smm /* Special case for KwKwK string. */ 389228753Smm if (code >= state->free_ent) { 390228753Smm *state->stackp++ = state->finbyte; 391228753Smm code = state->oldcode; 392228753Smm } 393228753Smm 394228753Smm /* Generate output characters in reverse order. */ 395228753Smm while (code >= 256) { 396228753Smm *state->stackp++ = state->suffix[code]; 397228753Smm code = state->prefix[code]; 398228753Smm } 399228753Smm *state->stackp++ = state->finbyte = code; 400228753Smm 401228753Smm /* Generate the new entry. */ 402228753Smm code = state->free_ent; 403228753Smm if (code < state->maxcode && state->oldcode >= 0) { 404228753Smm state->prefix[code] = state->oldcode; 405228753Smm state->suffix[code] = state->finbyte; 406228753Smm ++state->free_ent; 407228753Smm } 408228753Smm if (state->free_ent > state->section_end_code) { 409228753Smm state->bits++; 410228753Smm state->bytes_in_section = 0; 411228753Smm if (state->bits == state->maxcode_bits) 412228753Smm state->section_end_code = state->maxcode; 413228753Smm else 414228753Smm state->section_end_code = (1 << state->bits) - 1; 415228753Smm } 416228753Smm 417228753Smm /* Remember previous code. */ 418228753Smm state->oldcode = newcode; 419228753Smm return (ARCHIVE_OK); 420228753Smm} 421228753Smm 422228753Smm/* 423228753Smm * Return next 'n' bits from stream. 424228753Smm * 425228753Smm * -1 indicates end of available data. 426228753Smm */ 427228753Smmstatic int 428228753Smmgetbits(struct archive_read_filter *self, int n) 429228753Smm{ 430228753Smm struct private_data *state = (struct private_data *)self->data; 431228753Smm int code; 432228753Smm ssize_t ret; 433228753Smm static const int mask[] = { 434228753Smm 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff, 435228753Smm 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 436228753Smm }; 437228753Smm 438228753Smm while (state->bits_avail < n) { 439228753Smm if (state->avail_in <= 0) { 440231200Smm if (state->consume_unnotified) { 441231200Smm __archive_read_filter_consume(self->upstream, 442231200Smm state->consume_unnotified); 443231200Smm state->consume_unnotified = 0; 444231200Smm } 445228753Smm state->next_in 446228753Smm = __archive_read_filter_ahead(self->upstream, 447228753Smm 1, &ret); 448228753Smm if (ret == 0) 449228753Smm return (-1); 450228753Smm if (ret < 0 || state->next_in == NULL) 451228753Smm return (ARCHIVE_FATAL); 452231200Smm state->consume_unnotified = state->avail_in = ret; 453228753Smm } 454228753Smm state->bit_buffer |= *state->next_in++ << state->bits_avail; 455228753Smm state->avail_in--; 456228753Smm state->bits_avail += 8; 457228753Smm state->bytes_in_section++; 458228753Smm } 459228753Smm 460228753Smm code = state->bit_buffer; 461228753Smm state->bit_buffer >>= n; 462228753Smm state->bits_avail -= n; 463228753Smm 464228753Smm return (code & mask[n]); 465228753Smm} 466