archive_write_set_compression_compress.c revision 228753
1133819Stjr/*- 2230132Suqs * Copyright (c) 2008 Joerg Sonnenberger 3133819Stjr * All rights reserved. 4133819Stjr * 5133819Stjr * Redistribution and use in source and binary forms, with or without 6133819Stjr * modification, are permitted provided that the following conditions 7133819Stjr * are met: 8133819Stjr * 1. Redistributions of source code must retain the above copyright 9133819Stjr * notice, this list of conditions and the following disclaimer. 10133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 11133819Stjr * notice, this list of conditions and the following disclaimer in the 12133819Stjr * documentation and/or other materials provided with the distribution. 13133819Stjr * 14133819Stjr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15133819Stjr * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16133819Stjr * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17133819Stjr * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18133819Stjr * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19133819Stjr * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20133819Stjr * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21133819Stjr * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22133819Stjr * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23133819Stjr * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24133819Stjr */ 25133819Stjr 26133819Stjr/*- 27133819Stjr * Copyright (c) 1985, 1986, 1992, 1993 28133819Stjr * The Regents of the University of California. All rights reserved. 29133819Stjr * 30133819Stjr * This code is derived from software contributed to Berkeley by 31133819Stjr * Diomidis Spinellis and James A. Woods, derived from original 32235063Snetchild * work by Spencer Thomas and Joseph Orost. 33235063Snetchild * 34235063Snetchild * Redistribution and use in source and binary forms, with or without 35133819Stjr * modification, are permitted provided that the following conditions 36235063Snetchild * are met: 37235063Snetchild * 1. Redistributions of source code must retain the above copyright 38133819Stjr * notice, this list of conditions and the following disclaimer. 39133819Stjr * 2. Redistributions in binary form must reproduce the above copyright 40133819Stjr * notice, this list of conditions and the following disclaimer in the 41133819Stjr * documentation and/or other materials provided with the distribution. 42133819Stjr * 3. Neither the name of the University nor the names of its contributors 43235063Snetchild * may be used to endorse or promote products derived from this software 44133819Stjr * without specific prior written permission. 45133819Stjr * 46235063Snetchild * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47235063Snetchild * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48235063Snetchild * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49133819Stjr * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50133819Stjr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51133819Stjr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52133819Stjr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53133819Stjr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54133819Stjr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55133819Stjr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56133819Stjr * SUCH DAMAGE. 57133819Stjr */ 58133819Stjr 59133819Stjr#include "archive_platform.h" 60133819Stjr 61133819Stjr__FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_compress.c 201111 2009-12-28 03:33:05Z kientzle $"); 62133819Stjr 63133819Stjr#ifdef HAVE_ERRNO_H 64133819Stjr#include <errno.h> 65133819Stjr#endif 66133819Stjr#ifdef HAVE_STDLIB_H 67133819Stjr#include <stdlib.h> 68133819Stjr#endif 69133819Stjr#ifdef HAVE_STRING_H 70133819Stjr#include <string.h> 71133819Stjr#endif 72159824Snetchild 73255675Srdivacky#include "archive.h" 74255675Srdivacky#include "archive_private.h" 75255675Srdivacky#include "archive_write_private.h" 76159824Snetchild 77159824Snetchild#define HSIZE 69001 /* 95% occupancy */ 78159824Snetchild#define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */ 79159824Snetchild#define CHECK_GAP 10000 /* Ratio check interval. */ 80159824Snetchild 81159824Snetchild#define MAXCODE(bits) ((1 << (bits)) - 1) 82159824Snetchild 83159824Snetchild/* 84159824Snetchild * the next two codes should not be changed lightly, as they must not 85159824Snetchild * lie within the contiguous general code space. 86159824Snetchild */ 87159824Snetchild#define FIRST 257 /* First free entry. */ 88159824Snetchild#define CLEAR 256 /* Table clear output code. */ 89232799Snetchild 90159824Snetchildstruct private_data { 91159824Snetchild off_t in_count, out_count, checkpoint; 92159824Snetchild 93232799Snetchild int code_len; /* Number of bits/code. */ 94159824Snetchild int cur_maxcode; /* Maximum code, given n_bits. */ 95159824Snetchild int max_maxcode; /* Should NEVER generate this code. */ 96159824Snetchild int hashtab [HSIZE]; 97159824Snetchild unsigned short codetab [HSIZE]; 98159824Snetchild int first_free; /* First unused entry. */ 99232799Snetchild int compress_ratio; 100159824Snetchild 101159824Snetchild int cur_code, cur_fcode; 102159824Snetchild 103159824Snetchild int bit_offset; 104232799Snetchild unsigned char bit_buf; 105178257Sjkim 106178257Sjkim unsigned char *compressed; 107178257Sjkim size_t compressed_buffer_size; 108178257Sjkim size_t compressed_offset; 109232799Snetchild}; 110232799Snetchild 111232799Snetchildstatic int archive_compressor_compress_finish(struct archive_write *); 112232799Snetchildstatic int archive_compressor_compress_init(struct archive_write *); 113232799Snetchildstatic int archive_compressor_compress_write(struct archive_write *, 114232799Snetchild const void *, size_t); 115232799Snetchild 116232799Snetchild/* 117232799Snetchild * Allocate, initialize and return a archive object. 118232799Snetchild */ 119232799Snetchildint 120232799Snetchildarchive_write_set_compression_compress(struct archive *_a) 121232799Snetchild{ 122232799Snetchild struct archive_write *a = (struct archive_write *)_a; 123232799Snetchild __archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, 124232799Snetchild ARCHIVE_STATE_NEW, "archive_write_set_compression_compress"); 125232799Snetchild a->compressor.init = &archive_compressor_compress_init; 126232799Snetchild a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS; 127255675Srdivacky a->archive.compression_name = "compress"; 128232799Snetchild return (ARCHIVE_OK); 129232799Snetchild} 130232799Snetchild 131232799Snetchild/* 132232799Snetchild * Setup callback. 133232799Snetchild */ 134232799Snetchildstatic int 135232799Snetchildarchive_compressor_compress_init(struct archive_write *a) 136232799Snetchild{ 137232799Snetchild int ret; 138232799Snetchild struct private_data *state; 139232799Snetchild 140232799Snetchild a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS; 141232799Snetchild a->archive.compression_name = "compress"; 142232799Snetchild 143232799Snetchild if (a->bytes_per_block < 4) { 144232799Snetchild archive_set_error(&a->archive, EINVAL, 145232799Snetchild "Can't write Compress header as single block"); 146232799Snetchild return (ARCHIVE_FATAL); 147232799Snetchild } 148232799Snetchild 149232799Snetchild if (a->client_opener != NULL) { 150232799Snetchild ret = (a->client_opener)(&a->archive, a->client_data); 151133819Stjr if (ret != ARCHIVE_OK) 152133819Stjr return (ret); 153133819Stjr } 154133819Stjr 155133819Stjr state = (struct private_data *)malloc(sizeof(*state)); 156133819Stjr if (state == NULL) { 157133819Stjr archive_set_error(&a->archive, ENOMEM, 158133819Stjr "Can't allocate data for compression"); 159133819Stjr return (ARCHIVE_FATAL); 160133819Stjr } 161133819Stjr memset(state, 0, sizeof(*state)); 162133819Stjr 163133819Stjr state->compressed_buffer_size = a->bytes_per_block; 164133819Stjr state->compressed = malloc(state->compressed_buffer_size); 165133819Stjr 166133819Stjr if (state->compressed == NULL) { 167133819Stjr archive_set_error(&a->archive, ENOMEM, 168133819Stjr "Can't allocate data for compression buffer"); 169133819Stjr free(state); 170133819Stjr return (ARCHIVE_FATAL); 171133819Stjr } 172 173 a->compressor.write = archive_compressor_compress_write; 174 a->compressor.finish = archive_compressor_compress_finish; 175 176 state->max_maxcode = 0x10000; /* Should NEVER generate this code. */ 177 state->in_count = 0; /* Length of input. */ 178 state->bit_buf = 0; 179 state->bit_offset = 0; 180 state->out_count = 3; /* Includes 3-byte header mojo. */ 181 state->compress_ratio = 0; 182 state->checkpoint = CHECK_GAP; 183 state->code_len = 9; 184 state->cur_maxcode = MAXCODE(state->code_len); 185 state->first_free = FIRST; 186 187 memset(state->hashtab, 0xff, sizeof(state->hashtab)); 188 189 /* Prime output buffer with a gzip header. */ 190 state->compressed[0] = 0x1f; /* Compress */ 191 state->compressed[1] = 0x9d; 192 state->compressed[2] = 0x90; /* Block mode, 16bit max */ 193 state->compressed_offset = 3; 194 195 a->compressor.data = state; 196 return (0); 197} 198 199/*- 200 * Output the given code. 201 * Inputs: 202 * code: A n_bits-bit integer. If == -1, then EOF. This assumes 203 * that n_bits =< (long)wordsize - 1. 204 * Outputs: 205 * Outputs code to the file. 206 * Assumptions: 207 * Chars are 8 bits long. 208 * Algorithm: 209 * Maintain a BITS character long buffer (so that 8 codes will 210 * fit in it exactly). Use the VAX insv instruction to insert each 211 * code in turn. When the buffer fills up empty it and start over. 212 */ 213 214static unsigned char rmask[9] = 215 {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 216 217static int 218output_byte(struct archive_write *a, unsigned char c) 219{ 220 struct private_data *state = a->compressor.data; 221 ssize_t bytes_written; 222 223 state->compressed[state->compressed_offset++] = c; 224 ++state->out_count; 225 226 if (state->compressed_buffer_size == state->compressed_offset) { 227 bytes_written = (a->client_writer)(&a->archive, 228 a->client_data, 229 state->compressed, state->compressed_buffer_size); 230 if (bytes_written <= 0) 231 return ARCHIVE_FATAL; 232 a->archive.raw_position += bytes_written; 233 state->compressed_offset = 0; 234 } 235 236 return ARCHIVE_OK; 237} 238 239static int 240output_code(struct archive_write *a, int ocode) 241{ 242 struct private_data *state = a->compressor.data; 243 int bits, ret, clear_flg, bit_offset; 244 245 clear_flg = ocode == CLEAR; 246 247 /* 248 * Since ocode is always >= 8 bits, only need to mask the first 249 * hunk on the left. 250 */ 251 bit_offset = state->bit_offset % 8; 252 state->bit_buf |= (ocode << bit_offset) & 0xff; 253 output_byte(a, state->bit_buf); 254 255 bits = state->code_len - (8 - bit_offset); 256 ocode >>= 8 - bit_offset; 257 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 258 if (bits >= 8) { 259 output_byte(a, ocode & 0xff); 260 ocode >>= 8; 261 bits -= 8; 262 } 263 /* Last bits. */ 264 state->bit_offset += state->code_len; 265 state->bit_buf = ocode & rmask[bits]; 266 if (state->bit_offset == state->code_len * 8) 267 state->bit_offset = 0; 268 269 /* 270 * If the next entry is going to be too big for the ocode size, 271 * then increase it, if possible. 272 */ 273 if (clear_flg || state->first_free > state->cur_maxcode) { 274 /* 275 * Write the whole buffer, because the input side won't 276 * discover the size increase until after it has read it. 277 */ 278 if (state->bit_offset > 0) { 279 while (state->bit_offset < state->code_len * 8) { 280 ret = output_byte(a, state->bit_buf); 281 if (ret != ARCHIVE_OK) 282 return ret; 283 state->bit_offset += 8; 284 state->bit_buf = 0; 285 } 286 } 287 state->bit_buf = 0; 288 state->bit_offset = 0; 289 290 if (clear_flg) { 291 state->code_len = 9; 292 state->cur_maxcode = MAXCODE(state->code_len); 293 } else { 294 state->code_len++; 295 if (state->code_len == 16) 296 state->cur_maxcode = state->max_maxcode; 297 else 298 state->cur_maxcode = MAXCODE(state->code_len); 299 } 300 } 301 302 return (ARCHIVE_OK); 303} 304 305static int 306output_flush(struct archive_write *a) 307{ 308 struct private_data *state = a->compressor.data; 309 int ret; 310 311 /* At EOF, write the rest of the buffer. */ 312 if (state->bit_offset % 8) { 313 state->code_len = (state->bit_offset % 8 + 7) / 8; 314 ret = output_byte(a, state->bit_buf); 315 if (ret != ARCHIVE_OK) 316 return ret; 317 } 318 319 return (ARCHIVE_OK); 320} 321 322/* 323 * Write data to the compressed stream. 324 */ 325static int 326archive_compressor_compress_write(struct archive_write *a, const void *buff, 327 size_t length) 328{ 329 struct private_data *state; 330 int i; 331 int ratio; 332 int c, disp, ret; 333 const unsigned char *bp; 334 335 state = (struct private_data *)a->compressor.data; 336 if (a->client_writer == NULL) { 337 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 338 "No write callback is registered? " 339 "This is probably an internal programming error."); 340 return (ARCHIVE_FATAL); 341 } 342 343 if (length == 0) 344 return ARCHIVE_OK; 345 346 bp = buff; 347 348 if (state->in_count == 0) { 349 state->cur_code = *bp++; 350 ++state->in_count; 351 --length; 352 } 353 354 while (length--) { 355 c = *bp++; 356 state->in_count++; 357 state->cur_fcode = (c << 16) + state->cur_code; 358 i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */ 359 360 if (state->hashtab[i] == state->cur_fcode) { 361 state->cur_code = state->codetab[i]; 362 continue; 363 } 364 if (state->hashtab[i] < 0) /* Empty slot. */ 365 goto nomatch; 366 /* Secondary hash (after G. Knott). */ 367 if (i == 0) 368 disp = 1; 369 else 370 disp = HSIZE - i; 371 probe: 372 if ((i -= disp) < 0) 373 i += HSIZE; 374 375 if (state->hashtab[i] == state->cur_fcode) { 376 state->cur_code = state->codetab[i]; 377 continue; 378 } 379 if (state->hashtab[i] >= 0) 380 goto probe; 381 nomatch: 382 ret = output_code(a, state->cur_code); 383 if (ret != ARCHIVE_OK) 384 return ret; 385 state->cur_code = c; 386 if (state->first_free < state->max_maxcode) { 387 state->codetab[i] = state->first_free++; /* code -> hashtable */ 388 state->hashtab[i] = state->cur_fcode; 389 continue; 390 } 391 if (state->in_count < state->checkpoint) 392 continue; 393 394 state->checkpoint = state->in_count + CHECK_GAP; 395 396 if (state->in_count <= 0x007fffff) 397 ratio = state->in_count * 256 / state->out_count; 398 else if ((ratio = state->out_count / 256) == 0) 399 ratio = 0x7fffffff; 400 else 401 ratio = state->in_count / ratio; 402 403 if (ratio > state->compress_ratio) 404 state->compress_ratio = ratio; 405 else { 406 state->compress_ratio = 0; 407 memset(state->hashtab, 0xff, sizeof(state->hashtab)); 408 state->first_free = FIRST; 409 ret = output_code(a, CLEAR); 410 if (ret != ARCHIVE_OK) 411 return ret; 412 } 413 } 414 415 return (ARCHIVE_OK); 416} 417 418 419/* 420 * Finish the compression... 421 */ 422static int 423archive_compressor_compress_finish(struct archive_write *a) 424{ 425 ssize_t block_length, target_block_length, bytes_written; 426 int ret; 427 struct private_data *state; 428 size_t tocopy; 429 430 state = (struct private_data *)a->compressor.data; 431 if (a->client_writer == NULL) { 432 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 433 "No write callback is registered? " 434 "This is probably an internal programming error."); 435 ret = ARCHIVE_FATAL; 436 goto cleanup; 437 } 438 439 /* By default, always pad the uncompressed data. */ 440 if (a->pad_uncompressed) { 441 while (state->in_count % a->bytes_per_block != 0) { 442 tocopy = a->bytes_per_block - 443 (state->in_count % a->bytes_per_block); 444 if (tocopy > a->null_length) 445 tocopy = a->null_length; 446 ret = archive_compressor_compress_write(a, a->nulls, 447 tocopy); 448 if (ret != ARCHIVE_OK) 449 goto cleanup; 450 } 451 } 452 453 ret = output_code(a, state->cur_code); 454 if (ret != ARCHIVE_OK) 455 goto cleanup; 456 ret = output_flush(a); 457 if (ret != ARCHIVE_OK) 458 goto cleanup; 459 460 /* Optionally, pad the final compressed block. */ 461 block_length = state->compressed_offset; 462 463 /* Tricky calculation to determine size of last block. */ 464 if (a->bytes_in_last_block <= 0) 465 /* Default or Zero: pad to full block */ 466 target_block_length = a->bytes_per_block; 467 else 468 /* Round length to next multiple of bytes_in_last_block. */ 469 target_block_length = a->bytes_in_last_block * 470 ( (block_length + a->bytes_in_last_block - 1) / 471 a->bytes_in_last_block); 472 if (target_block_length > a->bytes_per_block) 473 target_block_length = a->bytes_per_block; 474 if (block_length < target_block_length) { 475 memset(state->compressed + state->compressed_offset, 0, 476 target_block_length - block_length); 477 block_length = target_block_length; 478 } 479 480 /* Write the last block */ 481 bytes_written = (a->client_writer)(&a->archive, a->client_data, 482 state->compressed, block_length); 483 if (bytes_written <= 0) 484 ret = ARCHIVE_FATAL; 485 else 486 a->archive.raw_position += bytes_written; 487 488cleanup: 489 free(state->compressed); 490 free(state); 491 return (ret); 492} 493