1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm/* 27228753Smm * This code borrows heavily from "compress" source code, which is 28228753Smm * protected by the following copyright. (Clause 3 dropped by request 29228753Smm * of the Regents.) 30228753Smm */ 31228753Smm 32228753Smm/*- 33228753Smm * Copyright (c) 1985, 1986, 1992, 1993 34228753Smm * The Regents of the University of California. All rights reserved. 35228753Smm * 36228753Smm * This code is derived from software contributed to Berkeley by 37228753Smm * Diomidis Spinellis and James A. Woods, derived from original 38228753Smm * work by Spencer Thomas and Joseph Orost. 39228753Smm * 40228753Smm * Redistribution and use in source and binary forms, with or without 41228753Smm * modification, are permitted provided that the following conditions 42228753Smm * are met: 43228753Smm * 1. Redistributions of source code must retain the above copyright 44228753Smm * notice, this list of conditions and the following disclaimer. 45228753Smm * 2. Redistributions in binary form must reproduce the above copyright 46228753Smm * notice, this list of conditions and the following disclaimer in the 47228753Smm * documentation and/or other materials provided with the distribution. 48228753Smm * 4. Neither the name of the University nor the names of its contributors 49228753Smm * may be used to endorse or promote products derived from this software 50228753Smm * without specific prior written permission. 51228753Smm * 52228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55228753Smm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62228753Smm * SUCH DAMAGE. 63228753Smm */ 64228753Smm 65228753Smm 66228753Smm#include "archive_platform.h" 67229592Smm__FBSDID("$FreeBSD$"); 68228753Smm 69228753Smm#ifdef HAVE_ERRNO_H 70228753Smm#include <errno.h> 71228753Smm#endif 72228753Smm#ifdef HAVE_STDLIB_H 73228753Smm#include <stdlib.h> 74228753Smm#endif 75228753Smm#ifdef HAVE_STRING_H 76228753Smm#include <string.h> 77228753Smm#endif 78228753Smm#ifdef HAVE_UNISTD_H 79228753Smm#include <unistd.h> 80228753Smm#endif 81228753Smm 82228753Smm#include "archive.h" 83228753Smm#include "archive_private.h" 84228753Smm#include "archive_read_private.h" 85228753Smm 86228753Smm/* 87228753Smm * Because LZW decompression is pretty simple, I've just implemented 88228753Smm * the whole decompressor here (cribbing from "compress" source code, 89228753Smm * of course), rather than relying on an external library. I have 90228753Smm * made an effort to clarify and simplify the algorithm, so the 91228753Smm * names and structure here don't exactly match those used by compress. 92228753Smm */ 93228753Smm 94228753Smmstruct private_data { 95228753Smm /* Input variables. */ 96228753Smm const unsigned char *next_in; 97228753Smm size_t avail_in; 98228753Smm int bit_buffer; 99228753Smm int bits_avail; 100228753Smm size_t bytes_in_section; 101228753Smm 102228753Smm /* Output variables. */ 103228753Smm size_t out_block_size; 104228753Smm void *out_block; 105228753Smm 106228753Smm /* Decompression status variables. */ 107228753Smm int use_reset_code; 108228753Smm int end_of_stream; /* EOF status. */ 109228753Smm int maxcode; /* Largest code. */ 110228753Smm int maxcode_bits; /* Length of largest code. */ 111228753Smm int section_end_code; /* When to increase bits. */ 112228753Smm int bits; /* Current code length. */ 113228753Smm int oldcode; /* Previous code. */ 114228753Smm int finbyte; /* Last byte of prev code. */ 115228753Smm 116228753Smm /* Dictionary. */ 117228753Smm int free_ent; /* Next dictionary entry. */ 118228753Smm unsigned char suffix[65536]; 119228753Smm uint16_t prefix[65536]; 120228753Smm 121228753Smm /* 122228753Smm * Scratch area for expanding dictionary entries. Note: 123228753Smm * "worst" case here comes from compressing /dev/zero: the 124228753Smm * last code in the dictionary will code a sequence of 125228753Smm * 65536-256 zero bytes. Thus, we need stack space to expand 126228753Smm * a 65280-byte dictionary entry. (Of course, 32640:1 127228753Smm * compression could also be considered the "best" case. ;-) 128228753Smm */ 129228753Smm unsigned char *stackp; 130228753Smm unsigned char stack[65300]; 131228753Smm}; 132228753Smm 133228753Smmstatic int compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *); 134228753Smmstatic int compress_bidder_init(struct archive_read_filter *); 135228753Smmstatic int compress_bidder_free(struct archive_read_filter_bidder *); 136228753Smm 137228753Smmstatic ssize_t compress_filter_read(struct archive_read_filter *, const void **); 138228753Smmstatic int compress_filter_close(struct archive_read_filter *); 139228753Smm 140228753Smmstatic int getbits(struct archive_read_filter *, int n); 141228753Smmstatic int next_code(struct archive_read_filter *); 142228753Smm 143228753Smmint 144228753Smmarchive_read_support_compression_compress(struct archive *_a) 145228753Smm{ 146228753Smm struct archive_read *a = (struct archive_read *)_a; 147228753Smm struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a); 148228753Smm 149228753Smm if (bidder == NULL) 150228753Smm return (ARCHIVE_FATAL); 151228753Smm 152228753Smm bidder->data = NULL; 153228753Smm bidder->bid = compress_bidder_bid; 154228753Smm bidder->init = compress_bidder_init; 155228753Smm bidder->options = NULL; 156228753Smm bidder->free = compress_bidder_free; 157228753Smm return (ARCHIVE_OK); 158228753Smm} 159228753Smm 160228753Smm/* 161228753Smm * Test whether we can handle this data. 162228753Smm * 163228753Smm * This logic returns zero if any part of the signature fails. It 164228753Smm * also tries to Do The Right Thing if a very short buffer prevents us 165228753Smm * from verifying as much as we would like. 166228753Smm */ 167228753Smmstatic int 168228753Smmcompress_bidder_bid(struct archive_read_filter_bidder *self, 169228753Smm struct archive_read_filter *filter) 170228753Smm{ 171228753Smm const unsigned char *buffer; 172228753Smm ssize_t avail; 173228753Smm int bits_checked; 174228753Smm 175228753Smm (void)self; /* UNUSED */ 176228753Smm 177228753Smm buffer = __archive_read_filter_ahead(filter, 2, &avail); 178228753Smm 179228753Smm if (buffer == NULL) 180228753Smm return (0); 181228753Smm 182228753Smm bits_checked = 0; 183228753Smm if (buffer[0] != 037) /* Verify first ID byte. */ 184228753Smm return (0); 185228753Smm bits_checked += 8; 186228753Smm 187228753Smm if (buffer[1] != 0235) /* Verify second ID byte. */ 188228753Smm return (0); 189228753Smm bits_checked += 8; 190228753Smm 191228753Smm /* 192228753Smm * TODO: Verify more. 193228753Smm */ 194228753Smm 195228753Smm return (bits_checked); 196228753Smm} 197228753Smm 198228753Smm/* 199228753Smm * Setup the callbacks. 200228753Smm */ 201228753Smmstatic int 202228753Smmcompress_bidder_init(struct archive_read_filter *self) 203228753Smm{ 204228753Smm struct private_data *state; 205228753Smm static const size_t out_block_size = 64 * 1024; 206228753Smm void *out_block; 207228753Smm int code; 208228753Smm 209228753Smm self->code = ARCHIVE_COMPRESSION_COMPRESS; 210228753Smm self->name = "compress (.Z)"; 211228753Smm 212228753Smm state = (struct private_data *)calloc(sizeof(*state), 1); 213228753Smm out_block = malloc(out_block_size); 214228753Smm if (state == NULL || out_block == NULL) { 215228753Smm free(out_block); 216228753Smm free(state); 217228753Smm archive_set_error(&self->archive->archive, ENOMEM, 218228753Smm "Can't allocate data for %s decompression", 219228753Smm self->name); 220228753Smm return (ARCHIVE_FATAL); 221228753Smm } 222228753Smm 223228753Smm self->data = state; 224228753Smm state->out_block_size = out_block_size; 225228753Smm state->out_block = out_block; 226228753Smm self->read = compress_filter_read; 227228753Smm self->skip = NULL; /* not supported */ 228228753Smm self->close = compress_filter_close; 229228753Smm 230228753Smm /* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */ 231228753Smm 232228753Smm (void)getbits(self, 8); /* Skip first signature byte. */ 233228753Smm (void)getbits(self, 8); /* Skip second signature byte. */ 234228753Smm 235228753Smm code = getbits(self, 8); 236228753Smm state->maxcode_bits = code & 0x1f; 237228753Smm state->maxcode = (1 << state->maxcode_bits); 238228753Smm state->use_reset_code = code & 0x80; 239228753Smm 240228753Smm /* Initialize decompressor. */ 241228753Smm state->free_ent = 256; 242228753Smm state->stackp = state->stack; 243228753Smm if (state->use_reset_code) 244228753Smm state->free_ent++; 245228753Smm state->bits = 9; 246228753Smm state->section_end_code = (1<<state->bits) - 1; 247228753Smm state->oldcode = -1; 248228753Smm for (code = 255; code >= 0; code--) { 249228753Smm state->prefix[code] = 0; 250228753Smm state->suffix[code] = code; 251228753Smm } 252228753Smm next_code(self); 253228753Smm 254228753Smm return (ARCHIVE_OK); 255228753Smm} 256228753Smm 257228753Smm/* 258228753Smm * Return a block of data from the decompression buffer. Decompress more 259228753Smm * as necessary. 260228753Smm */ 261228753Smmstatic ssize_t 262228753Smmcompress_filter_read(struct archive_read_filter *self, const void **pblock) 263228753Smm{ 264228753Smm struct private_data *state; 265228753Smm unsigned char *p, *start, *end; 266228753Smm int ret; 267228753Smm 268228753Smm state = (struct private_data *)self->data; 269228753Smm if (state->end_of_stream) { 270228753Smm *pblock = NULL; 271228753Smm return (0); 272228753Smm } 273228753Smm p = start = (unsigned char *)state->out_block; 274228753Smm end = start + state->out_block_size; 275228753Smm 276228753Smm while (p < end && !state->end_of_stream) { 277228753Smm if (state->stackp > state->stack) { 278228753Smm *p++ = *--state->stackp; 279228753Smm } else { 280228753Smm ret = next_code(self); 281228753Smm if (ret == -1) 282228753Smm state->end_of_stream = ret; 283228753Smm else if (ret != ARCHIVE_OK) 284228753Smm return (ret); 285228753Smm } 286228753Smm } 287228753Smm 288228753Smm *pblock = start; 289228753Smm return (p - start); 290228753Smm} 291228753Smm 292228753Smm/* 293228753Smm * Clean up the reader. 294228753Smm */ 295228753Smmstatic int 296228753Smmcompress_bidder_free(struct archive_read_filter_bidder *self) 297228753Smm{ 298228753Smm self->data = NULL; 299228753Smm return (ARCHIVE_OK); 300228753Smm} 301228753Smm 302228753Smm/* 303228753Smm * Close and release the filter. 304228753Smm */ 305228753Smmstatic int 306228753Smmcompress_filter_close(struct archive_read_filter *self) 307228753Smm{ 308228753Smm struct private_data *state = (struct private_data *)self->data; 309228753Smm 310228753Smm free(state->out_block); 311228753Smm free(state); 312228753Smm return (ARCHIVE_OK); 313228753Smm} 314228753Smm 315228753Smm/* 316228753Smm * Process the next code and fill the stack with the expansion 317228753Smm * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or 318228753Smm * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise. 319228753Smm */ 320228753Smmstatic int 321228753Smmnext_code(struct archive_read_filter *self) 322228753Smm{ 323228753Smm struct private_data *state = (struct private_data *)self->data; 324228753Smm int code, newcode; 325228753Smm 326228753Smm static int debug_buff[1024]; 327228753Smm static unsigned debug_index; 328228753Smm 329228753Smm code = newcode = getbits(self, state->bits); 330228753Smm if (code < 0) 331228753Smm return (code); 332228753Smm 333228753Smm debug_buff[debug_index++] = code; 334228753Smm if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0])) 335228753Smm debug_index = 0; 336228753Smm 337228753Smm /* If it's a reset code, reset the dictionary. */ 338228753Smm if ((code == 256) && state->use_reset_code) { 339228753Smm /* 340228753Smm * The original 'compress' implementation blocked its 341228753Smm * I/O in a manner that resulted in junk bytes being 342228753Smm * inserted after every reset. The next section skips 343228753Smm * this junk. (Yes, the number of *bytes* to skip is 344228753Smm * a function of the current *bit* length.) 345228753Smm */ 346228753Smm int skip_bytes = state->bits - 347228753Smm (state->bytes_in_section % state->bits); 348228753Smm skip_bytes %= state->bits; 349228753Smm state->bits_avail = 0; /* Discard rest of this byte. */ 350228753Smm while (skip_bytes-- > 0) { 351228753Smm code = getbits(self, 8); 352228753Smm if (code < 0) 353228753Smm return (code); 354228753Smm } 355228753Smm /* Now, actually do the reset. */ 356228753Smm state->bytes_in_section = 0; 357228753Smm state->bits = 9; 358228753Smm state->section_end_code = (1 << state->bits) - 1; 359228753Smm state->free_ent = 257; 360228753Smm state->oldcode = -1; 361228753Smm return (next_code(self)); 362228753Smm } 363228753Smm 364228753Smm if (code > state->free_ent) { 365228753Smm /* An invalid code is a fatal error. */ 366228753Smm archive_set_error(&(self->archive->archive), -1, 367228753Smm "Invalid compressed data"); 368228753Smm return (ARCHIVE_FATAL); 369228753Smm } 370228753Smm 371228753Smm /* Special case for KwKwK string. */ 372228753Smm if (code >= state->free_ent) { 373228753Smm *state->stackp++ = state->finbyte; 374228753Smm code = state->oldcode; 375228753Smm } 376228753Smm 377228753Smm /* Generate output characters in reverse order. */ 378228753Smm while (code >= 256) { 379228753Smm *state->stackp++ = state->suffix[code]; 380228753Smm code = state->prefix[code]; 381228753Smm } 382228753Smm *state->stackp++ = state->finbyte = code; 383228753Smm 384228753Smm /* Generate the new entry. */ 385228753Smm code = state->free_ent; 386228753Smm if (code < state->maxcode && state->oldcode >= 0) { 387228753Smm state->prefix[code] = state->oldcode; 388228753Smm state->suffix[code] = state->finbyte; 389228753Smm ++state->free_ent; 390228753Smm } 391228753Smm if (state->free_ent > state->section_end_code) { 392228753Smm state->bits++; 393228753Smm state->bytes_in_section = 0; 394228753Smm if (state->bits == state->maxcode_bits) 395228753Smm state->section_end_code = state->maxcode; 396228753Smm else 397228753Smm state->section_end_code = (1 << state->bits) - 1; 398228753Smm } 399228753Smm 400228753Smm /* Remember previous code. */ 401228753Smm state->oldcode = newcode; 402228753Smm return (ARCHIVE_OK); 403228753Smm} 404228753Smm 405228753Smm/* 406228753Smm * Return next 'n' bits from stream. 407228753Smm * 408228753Smm * -1 indicates end of available data. 409228753Smm */ 410228753Smmstatic int 411228753Smmgetbits(struct archive_read_filter *self, int n) 412228753Smm{ 413228753Smm struct private_data *state = (struct private_data *)self->data; 414228753Smm int code; 415228753Smm ssize_t ret; 416228753Smm static const int mask[] = { 417228753Smm 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff, 418228753Smm 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 419228753Smm }; 420228753Smm 421228753Smm while (state->bits_avail < n) { 422228753Smm if (state->avail_in <= 0) { 423228753Smm state->next_in 424228753Smm = __archive_read_filter_ahead(self->upstream, 425228753Smm 1, &ret); 426228753Smm if (ret == 0) 427228753Smm return (-1); 428228753Smm if (ret < 0 || state->next_in == NULL) 429228753Smm return (ARCHIVE_FATAL); 430228753Smm state->avail_in = ret; 431228753Smm __archive_read_filter_consume(self->upstream, ret); 432228753Smm } 433228753Smm state->bit_buffer |= *state->next_in++ << state->bits_avail; 434228753Smm state->avail_in--; 435228753Smm state->bits_avail += 8; 436228753Smm state->bytes_in_section++; 437228753Smm } 438228753Smm 439228753Smm code = state->bit_buffer; 440228753Smm state->bit_buffer >>= n; 441228753Smm state->bits_avail -= n; 442228753Smm 443228753Smm return (code & mask[n]); 444228753Smm} 445