1228753Smm/*- 2228753Smm * Copyright (c) 2008 Joerg Sonnenberger 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm/*- 27228753Smm * Copyright (c) 1985, 1986, 1992, 1993 28228753Smm * The Regents of the University of California. All rights reserved. 29228753Smm * 30228753Smm * This code is derived from software contributed to Berkeley by 31228753Smm * Diomidis Spinellis and James A. Woods, derived from original 32228753Smm * work by Spencer Thomas and Joseph Orost. 33228753Smm * 34228753Smm * Redistribution and use in source and binary forms, with or without 35228753Smm * modification, are permitted provided that the following conditions 36228753Smm * are met: 37228753Smm * 1. Redistributions of source code must retain the above copyright 38228753Smm * notice, this list of conditions and the following disclaimer. 39228753Smm * 2. Redistributions in binary form must reproduce the above copyright 40228753Smm * notice, this list of conditions and the following disclaimer in the 41228753Smm * documentation and/or other materials provided with the distribution. 42228753Smm * 3. Neither the name of the University nor the names of its contributors 43228753Smm * may be used to endorse or promote products derived from this software 44228753Smm * without specific prior written permission. 45228753Smm * 46228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49228753Smm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56228753Smm * SUCH DAMAGE. 57228753Smm */ 58228753Smm 59228753Smm#include "archive_platform.h" 60228753Smm 61229592Smm__FBSDID("$FreeBSD$"); 62228753Smm 63228753Smm#ifdef HAVE_ERRNO_H 64228753Smm#include <errno.h> 65228753Smm#endif 66228753Smm#ifdef HAVE_STDLIB_H 67228753Smm#include <stdlib.h> 68228753Smm#endif 69228753Smm#ifdef HAVE_STRING_H 70228753Smm#include <string.h> 71228753Smm#endif 72228753Smm 73228753Smm#include "archive.h" 74228753Smm#include "archive_private.h" 75228753Smm#include "archive_write_private.h" 76228753Smm 77228753Smm#define HSIZE 69001 /* 95% occupancy */ 78228753Smm#define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */ 79228753Smm#define CHECK_GAP 10000 /* Ratio check interval. */ 80228753Smm 81228753Smm#define MAXCODE(bits) ((1 << (bits)) - 1) 82228753Smm 83228753Smm/* 84228753Smm * the next two codes should not be changed lightly, as they must not 85228753Smm * lie within the contiguous general code space. 86228753Smm */ 87228753Smm#define FIRST 257 /* First free entry. */ 88228753Smm#define CLEAR 256 /* Table clear output code. */ 89228753Smm 90228753Smmstruct private_data { 91228753Smm off_t in_count, out_count, checkpoint; 92228753Smm 93228753Smm int code_len; /* Number of bits/code. */ 94228753Smm int cur_maxcode; /* Maximum code, given n_bits. */ 95228753Smm int max_maxcode; /* Should NEVER generate this code. */ 96228753Smm int hashtab [HSIZE]; 97228753Smm unsigned short codetab [HSIZE]; 98228753Smm int first_free; /* First unused entry. */ 99228753Smm int compress_ratio; 100228753Smm 101228753Smm int cur_code, cur_fcode; 102228753Smm 103228753Smm int bit_offset; 104228753Smm unsigned char bit_buf; 105228753Smm 106228753Smm unsigned char *compressed; 107228753Smm size_t compressed_buffer_size; 108228753Smm size_t compressed_offset; 109228753Smm}; 110228753Smm 111228753Smmstatic int archive_compressor_compress_finish(struct archive_write *); 112228753Smmstatic int archive_compressor_compress_init(struct archive_write *); 113228753Smmstatic int archive_compressor_compress_write(struct archive_write *, 114228753Smm const void *, size_t); 115228753Smm 116228753Smm/* 117228753Smm * Allocate, initialize and return a archive object. 118228753Smm */ 119228753Smmint 120228753Smmarchive_write_set_compression_compress(struct archive *_a) 121228753Smm{ 122228753Smm struct archive_write *a = (struct archive_write *)_a; 123228753Smm __archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, 124228753Smm ARCHIVE_STATE_NEW, "archive_write_set_compression_compress"); 125228753Smm a->compressor.init = &archive_compressor_compress_init; 126228753Smm a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS; 127228753Smm a->archive.compression_name = "compress"; 128228753Smm return (ARCHIVE_OK); 129228753Smm} 130228753Smm 131228753Smm/* 132228753Smm * Setup callback. 133228753Smm */ 134228753Smmstatic int 135228753Smmarchive_compressor_compress_init(struct archive_write *a) 136228753Smm{ 137228753Smm int ret; 138228753Smm struct private_data *state; 139228753Smm 140228753Smm a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS; 141228753Smm a->archive.compression_name = "compress"; 142228753Smm 143228753Smm if (a->bytes_per_block < 4) { 144228753Smm archive_set_error(&a->archive, EINVAL, 145228753Smm "Can't write Compress header as single block"); 146228753Smm return (ARCHIVE_FATAL); 147228753Smm } 148228753Smm 149228753Smm if (a->client_opener != NULL) { 150228753Smm ret = (a->client_opener)(&a->archive, a->client_data); 151228753Smm if (ret != ARCHIVE_OK) 152228753Smm return (ret); 153228753Smm } 154228753Smm 155228753Smm state = (struct private_data *)malloc(sizeof(*state)); 156228753Smm if (state == NULL) { 157228753Smm archive_set_error(&a->archive, ENOMEM, 158228753Smm "Can't allocate data for compression"); 159228753Smm return (ARCHIVE_FATAL); 160228753Smm } 161228753Smm memset(state, 0, sizeof(*state)); 162228753Smm 163228753Smm state->compressed_buffer_size = a->bytes_per_block; 164228753Smm state->compressed = malloc(state->compressed_buffer_size); 165228753Smm 166228753Smm if (state->compressed == NULL) { 167228753Smm archive_set_error(&a->archive, ENOMEM, 168228753Smm "Can't allocate data for compression buffer"); 169228753Smm free(state); 170228753Smm return (ARCHIVE_FATAL); 171228753Smm } 172228753Smm 173228753Smm a->compressor.write = archive_compressor_compress_write; 174228753Smm a->compressor.finish = archive_compressor_compress_finish; 175228753Smm 176228753Smm state->max_maxcode = 0x10000; /* Should NEVER generate this code. */ 177228753Smm state->in_count = 0; /* Length of input. */ 178228753Smm state->bit_buf = 0; 179228753Smm state->bit_offset = 0; 180228753Smm state->out_count = 3; /* Includes 3-byte header mojo. */ 181228753Smm state->compress_ratio = 0; 182228753Smm state->checkpoint = CHECK_GAP; 183228753Smm state->code_len = 9; 184228753Smm state->cur_maxcode = MAXCODE(state->code_len); 185228753Smm state->first_free = FIRST; 186228753Smm 187228753Smm memset(state->hashtab, 0xff, sizeof(state->hashtab)); 188228753Smm 189228753Smm /* Prime output buffer with a gzip header. */ 190228753Smm state->compressed[0] = 0x1f; /* Compress */ 191228753Smm state->compressed[1] = 0x9d; 192228753Smm state->compressed[2] = 0x90; /* Block mode, 16bit max */ 193228753Smm state->compressed_offset = 3; 194228753Smm 195228753Smm a->compressor.data = state; 196228753Smm return (0); 197228753Smm} 198228753Smm 199228753Smm/*- 200228753Smm * Output the given code. 201228753Smm * Inputs: 202228753Smm * code: A n_bits-bit integer. If == -1, then EOF. This assumes 203228753Smm * that n_bits =< (long)wordsize - 1. 204228753Smm * Outputs: 205228753Smm * Outputs code to the file. 206228753Smm * Assumptions: 207228753Smm * Chars are 8 bits long. 208228753Smm * Algorithm: 209228753Smm * Maintain a BITS character long buffer (so that 8 codes will 210228753Smm * fit in it exactly). Use the VAX insv instruction to insert each 211228753Smm * code in turn. When the buffer fills up empty it and start over. 212228753Smm */ 213228753Smm 214228753Smmstatic unsigned char rmask[9] = 215228753Smm {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 216228753Smm 217228753Smmstatic int 218228753Smmoutput_byte(struct archive_write *a, unsigned char c) 219228753Smm{ 220228753Smm struct private_data *state = a->compressor.data; 221228753Smm ssize_t bytes_written; 222228753Smm 223228753Smm state->compressed[state->compressed_offset++] = c; 224228753Smm ++state->out_count; 225228753Smm 226228753Smm if (state->compressed_buffer_size == state->compressed_offset) { 227228753Smm bytes_written = (a->client_writer)(&a->archive, 228228753Smm a->client_data, 229228753Smm state->compressed, state->compressed_buffer_size); 230228753Smm if (bytes_written <= 0) 231228753Smm return ARCHIVE_FATAL; 232228753Smm a->archive.raw_position += bytes_written; 233228753Smm state->compressed_offset = 0; 234228753Smm } 235228753Smm 236228753Smm return ARCHIVE_OK; 237228753Smm} 238228753Smm 239228753Smmstatic int 240228753Smmoutput_code(struct archive_write *a, int ocode) 241228753Smm{ 242228753Smm struct private_data *state = a->compressor.data; 243228753Smm int bits, ret, clear_flg, bit_offset; 244228753Smm 245228753Smm clear_flg = ocode == CLEAR; 246228753Smm 247228753Smm /* 248228753Smm * Since ocode is always >= 8 bits, only need to mask the first 249228753Smm * hunk on the left. 250228753Smm */ 251228753Smm bit_offset = state->bit_offset % 8; 252228753Smm state->bit_buf |= (ocode << bit_offset) & 0xff; 253228753Smm output_byte(a, state->bit_buf); 254228753Smm 255228753Smm bits = state->code_len - (8 - bit_offset); 256228753Smm ocode >>= 8 - bit_offset; 257228753Smm /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 258228753Smm if (bits >= 8) { 259228753Smm output_byte(a, ocode & 0xff); 260228753Smm ocode >>= 8; 261228753Smm bits -= 8; 262228753Smm } 263228753Smm /* Last bits. */ 264228753Smm state->bit_offset += state->code_len; 265228753Smm state->bit_buf = ocode & rmask[bits]; 266228753Smm if (state->bit_offset == state->code_len * 8) 267228753Smm state->bit_offset = 0; 268228753Smm 269228753Smm /* 270228753Smm * If the next entry is going to be too big for the ocode size, 271228753Smm * then increase it, if possible. 272228753Smm */ 273228753Smm if (clear_flg || state->first_free > state->cur_maxcode) { 274228753Smm /* 275228753Smm * Write the whole buffer, because the input side won't 276228753Smm * discover the size increase until after it has read it. 277228753Smm */ 278228753Smm if (state->bit_offset > 0) { 279228753Smm while (state->bit_offset < state->code_len * 8) { 280228753Smm ret = output_byte(a, state->bit_buf); 281228753Smm if (ret != ARCHIVE_OK) 282228753Smm return ret; 283228753Smm state->bit_offset += 8; 284228753Smm state->bit_buf = 0; 285228753Smm } 286228753Smm } 287228753Smm state->bit_buf = 0; 288228753Smm state->bit_offset = 0; 289228753Smm 290228753Smm if (clear_flg) { 291228753Smm state->code_len = 9; 292228753Smm state->cur_maxcode = MAXCODE(state->code_len); 293228753Smm } else { 294228753Smm state->code_len++; 295228753Smm if (state->code_len == 16) 296228753Smm state->cur_maxcode = state->max_maxcode; 297228753Smm else 298228753Smm state->cur_maxcode = MAXCODE(state->code_len); 299228753Smm } 300228753Smm } 301228753Smm 302228753Smm return (ARCHIVE_OK); 303228753Smm} 304228753Smm 305228753Smmstatic int 306228753Smmoutput_flush(struct archive_write *a) 307228753Smm{ 308228753Smm struct private_data *state = a->compressor.data; 309228753Smm int ret; 310228753Smm 311228753Smm /* At EOF, write the rest of the buffer. */ 312228753Smm if (state->bit_offset % 8) { 313228753Smm state->code_len = (state->bit_offset % 8 + 7) / 8; 314228753Smm ret = output_byte(a, state->bit_buf); 315228753Smm if (ret != ARCHIVE_OK) 316228753Smm return ret; 317228753Smm } 318228753Smm 319228753Smm return (ARCHIVE_OK); 320228753Smm} 321228753Smm 322228753Smm/* 323228753Smm * Write data to the compressed stream. 324228753Smm */ 325228753Smmstatic int 326228753Smmarchive_compressor_compress_write(struct archive_write *a, const void *buff, 327228753Smm size_t length) 328228753Smm{ 329228753Smm struct private_data *state; 330228753Smm int i; 331228753Smm int ratio; 332228753Smm int c, disp, ret; 333228753Smm const unsigned char *bp; 334228753Smm 335228753Smm state = (struct private_data *)a->compressor.data; 336228753Smm if (a->client_writer == NULL) { 337228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 338228753Smm "No write callback is registered? " 339228753Smm "This is probably an internal programming error."); 340228753Smm return (ARCHIVE_FATAL); 341228753Smm } 342228753Smm 343228753Smm if (length == 0) 344228753Smm return ARCHIVE_OK; 345228753Smm 346228753Smm bp = buff; 347228753Smm 348228753Smm if (state->in_count == 0) { 349228753Smm state->cur_code = *bp++; 350228753Smm ++state->in_count; 351228753Smm --length; 352228753Smm } 353228753Smm 354228753Smm while (length--) { 355228753Smm c = *bp++; 356228753Smm state->in_count++; 357228753Smm state->cur_fcode = (c << 16) + state->cur_code; 358228753Smm i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */ 359228753Smm 360228753Smm if (state->hashtab[i] == state->cur_fcode) { 361228753Smm state->cur_code = state->codetab[i]; 362228753Smm continue; 363228753Smm } 364228753Smm if (state->hashtab[i] < 0) /* Empty slot. */ 365228753Smm goto nomatch; 366228753Smm /* Secondary hash (after G. Knott). */ 367228753Smm if (i == 0) 368228753Smm disp = 1; 369228753Smm else 370228753Smm disp = HSIZE - i; 371228753Smm probe: 372228753Smm if ((i -= disp) < 0) 373228753Smm i += HSIZE; 374228753Smm 375228753Smm if (state->hashtab[i] == state->cur_fcode) { 376228753Smm state->cur_code = state->codetab[i]; 377228753Smm continue; 378228753Smm } 379228753Smm if (state->hashtab[i] >= 0) 380228753Smm goto probe; 381228753Smm nomatch: 382228753Smm ret = output_code(a, state->cur_code); 383228753Smm if (ret != ARCHIVE_OK) 384228753Smm return ret; 385228753Smm state->cur_code = c; 386228753Smm if (state->first_free < state->max_maxcode) { 387228753Smm state->codetab[i] = state->first_free++; /* code -> hashtable */ 388228753Smm state->hashtab[i] = state->cur_fcode; 389228753Smm continue; 390228753Smm } 391228753Smm if (state->in_count < state->checkpoint) 392228753Smm continue; 393228753Smm 394228753Smm state->checkpoint = state->in_count + CHECK_GAP; 395228753Smm 396228753Smm if (state->in_count <= 0x007fffff) 397228753Smm ratio = state->in_count * 256 / state->out_count; 398228753Smm else if ((ratio = state->out_count / 256) == 0) 399228753Smm ratio = 0x7fffffff; 400228753Smm else 401228753Smm ratio = state->in_count / ratio; 402228753Smm 403228753Smm if (ratio > state->compress_ratio) 404228753Smm state->compress_ratio = ratio; 405228753Smm else { 406228753Smm state->compress_ratio = 0; 407228753Smm memset(state->hashtab, 0xff, sizeof(state->hashtab)); 408228753Smm state->first_free = FIRST; 409228753Smm ret = output_code(a, CLEAR); 410228753Smm if (ret != ARCHIVE_OK) 411228753Smm return ret; 412228753Smm } 413228753Smm } 414228753Smm 415228753Smm return (ARCHIVE_OK); 416228753Smm} 417228753Smm 418228753Smm 419228753Smm/* 420228753Smm * Finish the compression... 421228753Smm */ 422228753Smmstatic int 423228753Smmarchive_compressor_compress_finish(struct archive_write *a) 424228753Smm{ 425228753Smm ssize_t block_length, target_block_length, bytes_written; 426228753Smm int ret; 427228753Smm struct private_data *state; 428228753Smm size_t tocopy; 429228753Smm 430228753Smm state = (struct private_data *)a->compressor.data; 431228753Smm if (a->client_writer == NULL) { 432228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 433228753Smm "No write callback is registered? " 434228753Smm "This is probably an internal programming error."); 435228753Smm ret = ARCHIVE_FATAL; 436228753Smm goto cleanup; 437228753Smm } 438228753Smm 439228753Smm /* By default, always pad the uncompressed data. */ 440228753Smm if (a->pad_uncompressed) { 441228753Smm while (state->in_count % a->bytes_per_block != 0) { 442228753Smm tocopy = a->bytes_per_block - 443228753Smm (state->in_count % a->bytes_per_block); 444228753Smm if (tocopy > a->null_length) 445228753Smm tocopy = a->null_length; 446228753Smm ret = archive_compressor_compress_write(a, a->nulls, 447228753Smm tocopy); 448228753Smm if (ret != ARCHIVE_OK) 449228753Smm goto cleanup; 450228753Smm } 451228753Smm } 452228753Smm 453228753Smm ret = output_code(a, state->cur_code); 454228753Smm if (ret != ARCHIVE_OK) 455228753Smm goto cleanup; 456228753Smm ret = output_flush(a); 457228753Smm if (ret != ARCHIVE_OK) 458228753Smm goto cleanup; 459228753Smm 460228753Smm /* Optionally, pad the final compressed block. */ 461228753Smm block_length = state->compressed_offset; 462228753Smm 463228753Smm /* Tricky calculation to determine size of last block. */ 464228753Smm if (a->bytes_in_last_block <= 0) 465228753Smm /* Default or Zero: pad to full block */ 466228753Smm target_block_length = a->bytes_per_block; 467228753Smm else 468228753Smm /* Round length to next multiple of bytes_in_last_block. */ 469228753Smm target_block_length = a->bytes_in_last_block * 470228753Smm ( (block_length + a->bytes_in_last_block - 1) / 471228753Smm a->bytes_in_last_block); 472228753Smm if (target_block_length > a->bytes_per_block) 473228753Smm target_block_length = a->bytes_per_block; 474228753Smm if (block_length < target_block_length) { 475228753Smm memset(state->compressed + state->compressed_offset, 0, 476228753Smm target_block_length - block_length); 477228753Smm block_length = target_block_length; 478228753Smm } 479228753Smm 480228753Smm /* Write the last block */ 481228753Smm bytes_written = (a->client_writer)(&a->archive, a->client_data, 482228753Smm state->compressed, block_length); 483228753Smm if (bytes_written <= 0) 484228753Smm ret = ARCHIVE_FATAL; 485228753Smm else 486228753Smm a->archive.raw_position += bytes_written; 487228753Smm 488228753Smmcleanup: 489228753Smm free(state->compressed); 490228753Smm free(state); 491228753Smm return (ret); 492228753Smm} 493