zopen.c revision 92920
11590Srgrimes/*- 21590Srgrimes * Copyright (c) 1985, 1986, 1992, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * This code is derived from software contributed to Berkeley by 61590Srgrimes * Diomidis Spinellis and James A. Woods, derived from original 71590Srgrimes * work by Spencer Thomas and Joseph Orost. 81590Srgrimes * 91590Srgrimes * Redistribution and use in source and binary forms, with or without 101590Srgrimes * modification, are permitted provided that the following conditions 111590Srgrimes * are met: 121590Srgrimes * 1. Redistributions of source code must retain the above copyright 131590Srgrimes * notice, this list of conditions and the following disclaimer. 141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151590Srgrimes * notice, this list of conditions and the following disclaimer in the 161590Srgrimes * documentation and/or other materials provided with the distribution. 171590Srgrimes * 3. All advertising materials mentioning features or use of this software 181590Srgrimes * must display the following acknowledgement: 191590Srgrimes * This product includes software developed by the University of 201590Srgrimes * California, Berkeley and its contributors. 211590Srgrimes * 4. Neither the name of the University nor the names of its contributors 221590Srgrimes * may be used to endorse or promote products derived from this software 231590Srgrimes * without specific prior written permission. 241590Srgrimes * 251590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 261590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 271590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 281590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 291590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 301590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 311590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 321590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 331590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 341590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 351590Srgrimes * SUCH DAMAGE. 361590Srgrimes */ 371590Srgrimes 381590Srgrimes#if defined(LIBC_SCCS) && !defined(lint) 391590Srgrimesstatic char sccsid[] = "@(#)zopen.c 8.1 (Berkeley) 6/27/93"; 401590Srgrimes#endif /* LIBC_SCCS and not lint */ 411590Srgrimes 4287628Sdwmalone#include <sys/cdefs.h> 4387628Sdwmalone__FBSDID("$FreeBSD: head/usr.bin/compress/zopen.c 92920 2002-03-22 01:22:50Z imp $"); 4487628Sdwmalone 451590Srgrimes/*- 461590Srgrimes * fcompress.c - File compression ala IEEE Computer, June 1984. 471590Srgrimes * 481590Srgrimes * Compress authors: 491590Srgrimes * Spencer W. Thomas (decvax!utah-cs!thomas) 501590Srgrimes * Jim McKie (decvax!mcvax!jim) 511590Srgrimes * Steve Davies (decvax!vax135!petsd!peora!srd) 521590Srgrimes * Ken Turkowski (decvax!decwrl!turtlevax!ken) 531590Srgrimes * James A. Woods (decvax!ihnp4!ames!jaw) 541590Srgrimes * Joe Orost (decvax!vax135!petsd!joe) 551590Srgrimes * 561590Srgrimes * Cleaned up and converted to library returning I/O streams by 571590Srgrimes * Diomidis Spinellis <dds@doc.ic.ac.uk>. 581590Srgrimes * 591590Srgrimes * zopen(filename, mode, bits) 601590Srgrimes * Returns a FILE * that can be used for read or write. The modes 611590Srgrimes * supported are only "r" and "w". Seeking is not allowed. On 621590Srgrimes * reading the file is decompressed, on writing it is compressed. 631590Srgrimes * The output is compatible with compress(1) with 16 bit tables. 641590Srgrimes * Any file produced by compress(1) can be read. 651590Srgrimes */ 661590Srgrimes 671590Srgrimes#include <sys/param.h> 681590Srgrimes#include <sys/stat.h> 691590Srgrimes 701590Srgrimes#include <ctype.h> 711590Srgrimes#include <errno.h> 721590Srgrimes#include <signal.h> 731590Srgrimes#include <stdio.h> 741590Srgrimes#include <stdlib.h> 751590Srgrimes#include <string.h> 761590Srgrimes#include <unistd.h> 7717717Swosch#include "zopen.h" 781590Srgrimes 791590Srgrimes#define BITS 16 /* Default bits. */ 801590Srgrimes#define HSIZE 69001 /* 95% occupancy */ 811590Srgrimes 821590Srgrimes/* A code_int must be able to hold 2**BITS values of type int, and also -1. */ 831590Srgrimestypedef long code_int; 841590Srgrimestypedef long count_int; 851590Srgrimes 861590Srgrimestypedef u_char char_type; 871590Srgrimesstatic char_type magic_header[] = 881590Srgrimes {'\037', '\235'}; /* 1F 9D */ 891590Srgrimes 901590Srgrimes#define BIT_MASK 0x1f /* Defines for third byte of header. */ 911590Srgrimes#define BLOCK_MASK 0x80 921590Srgrimes 931590Srgrimes/* 941590Srgrimes * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 951590Srgrimes * a fourth header byte (for expansion). 961590Srgrimes */ 971590Srgrimes#define INIT_BITS 9 /* Initial number of bits/code. */ 981590Srgrimes 991590Srgrimes#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 1001590Srgrimes 1011590Srgrimesstruct s_zstate { 1021590Srgrimes FILE *zs_fp; /* File stream for I/O */ 1031590Srgrimes char zs_mode; /* r or w */ 1041590Srgrimes enum { 1051590Srgrimes S_START, S_MIDDLE, S_EOF 1061590Srgrimes } zs_state; /* State of computation */ 10787247Smarkm u_int zs_n_bits; /* Number of bits/code. */ 10887247Smarkm u_int zs_maxbits; /* User settable max # bits/code. */ 1091590Srgrimes code_int zs_maxcode; /* Maximum code, given n_bits. */ 1101590Srgrimes code_int zs_maxmaxcode; /* Should NEVER generate this code. */ 1111590Srgrimes count_int zs_htab [HSIZE]; 1121590Srgrimes u_short zs_codetab [HSIZE]; 1131590Srgrimes code_int zs_hsize; /* For dynamic table sizing. */ 1141590Srgrimes code_int zs_free_ent; /* First unused entry. */ 1151590Srgrimes /* 1161590Srgrimes * Block compression parameters -- after all codes are used up, 1171590Srgrimes * and compression rate changes, start over. 1181590Srgrimes */ 1191590Srgrimes int zs_block_compress; 1201590Srgrimes int zs_clear_flg; 1211590Srgrimes long zs_ratio; 1221590Srgrimes count_int zs_checkpoint; 12387247Smarkm u_int zs_offset; 1241590Srgrimes long zs_in_count; /* Length of input. */ 1251590Srgrimes long zs_bytes_out; /* Length of compressed output. */ 1261590Srgrimes long zs_out_count; /* # of codes output (for debugging). */ 1271590Srgrimes char_type zs_buf[BITS]; 1281590Srgrimes union { 1291590Srgrimes struct { 1301590Srgrimes long zs_fcode; 1311590Srgrimes code_int zs_ent; 1321590Srgrimes code_int zs_hsize_reg; 1331590Srgrimes int zs_hshift; 1341590Srgrimes } w; /* Write paramenters */ 1351590Srgrimes struct { 1361590Srgrimes char_type *zs_stackp; 1371590Srgrimes int zs_finchar; 1381590Srgrimes code_int zs_code, zs_oldcode, zs_incode; 1391590Srgrimes int zs_roffset, zs_size; 1401590Srgrimes char_type zs_gbuf[BITS]; 1411590Srgrimes } r; /* Read parameters */ 1421590Srgrimes } u; 1431590Srgrimes}; 1441590Srgrimes 1451590Srgrimes/* Definitions to retain old variable names */ 1461590Srgrimes#define fp zs->zs_fp 1471590Srgrimes#define zmode zs->zs_mode 1481590Srgrimes#define state zs->zs_state 1491590Srgrimes#define n_bits zs->zs_n_bits 1501590Srgrimes#define maxbits zs->zs_maxbits 1511590Srgrimes#define maxcode zs->zs_maxcode 1521590Srgrimes#define maxmaxcode zs->zs_maxmaxcode 1531590Srgrimes#define htab zs->zs_htab 1541590Srgrimes#define codetab zs->zs_codetab 1551590Srgrimes#define hsize zs->zs_hsize 1561590Srgrimes#define free_ent zs->zs_free_ent 1571590Srgrimes#define block_compress zs->zs_block_compress 1581590Srgrimes#define clear_flg zs->zs_clear_flg 1591590Srgrimes#define ratio zs->zs_ratio 1601590Srgrimes#define checkpoint zs->zs_checkpoint 1611590Srgrimes#define offset zs->zs_offset 1621590Srgrimes#define in_count zs->zs_in_count 1631590Srgrimes#define bytes_out zs->zs_bytes_out 1641590Srgrimes#define out_count zs->zs_out_count 1651590Srgrimes#define buf zs->zs_buf 1661590Srgrimes#define fcode zs->u.w.zs_fcode 1671590Srgrimes#define hsize_reg zs->u.w.zs_hsize_reg 1681590Srgrimes#define ent zs->u.w.zs_ent 1691590Srgrimes#define hshift zs->u.w.zs_hshift 1701590Srgrimes#define stackp zs->u.r.zs_stackp 1711590Srgrimes#define finchar zs->u.r.zs_finchar 1721590Srgrimes#define code zs->u.r.zs_code 1731590Srgrimes#define oldcode zs->u.r.zs_oldcode 1741590Srgrimes#define incode zs->u.r.zs_incode 1751590Srgrimes#define roffset zs->u.r.zs_roffset 1761590Srgrimes#define size zs->u.r.zs_size 1771590Srgrimes#define gbuf zs->u.r.zs_gbuf 1781590Srgrimes 1791590Srgrimes/* 1801590Srgrimes * To save much memory, we overlay the table used by compress() with those 1811590Srgrimes * used by decompress(). The tab_prefix table is the same size and type as 1821590Srgrimes * the codetab. The tab_suffix table needs 2**BITS characters. We get this 1831590Srgrimes * from the beginning of htab. The output stack uses the rest of htab, and 1841590Srgrimes * contains characters. There is plenty of room for any possible stack 1851590Srgrimes * (stack used to be 8000 characters). 1861590Srgrimes */ 1871590Srgrimes 1881590Srgrimes#define htabof(i) htab[i] 1891590Srgrimes#define codetabof(i) codetab[i] 1901590Srgrimes 1911590Srgrimes#define tab_prefixof(i) codetabof(i) 1921590Srgrimes#define tab_suffixof(i) ((char_type *)(htab))[i] 1931590Srgrimes#define de_stack ((char_type *)&tab_suffixof(1 << BITS)) 1941590Srgrimes 1951590Srgrimes#define CHECK_GAP 10000 /* Ratio check interval. */ 1961590Srgrimes 1971590Srgrimes/* 1981590Srgrimes * the next two codes should not be changed lightly, as they must not 1991590Srgrimes * lie within the contiguous general code space. 2001590Srgrimes */ 2011590Srgrimes#define FIRST 257 /* First free entry. */ 2021590Srgrimes#define CLEAR 256 /* Table clear output code. */ 2031590Srgrimes 20492920Simpstatic int cl_block(struct s_zstate *); 20592920Simpstatic void cl_hash(struct s_zstate *, count_int); 20692920Simpstatic code_int getcode(struct s_zstate *); 20792920Simpstatic int output(struct s_zstate *, code_int); 20892920Simpstatic int zclose(void *); 20992920Simpstatic int zread(void *, char *, int); 21092920Simpstatic int zwrite(void *, const char *, int); 2111590Srgrimes 2121590Srgrimes/*- 2131590Srgrimes * Algorithm from "A Technique for High Performance Data Compression", 2141590Srgrimes * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. 2151590Srgrimes * 2161590Srgrimes * Algorithm: 2171590Srgrimes * Modified Lempel-Ziv method (LZW). Basically finds common 2181590Srgrimes * substrings and replaces them with a variable size code. This is 2191590Srgrimes * deterministic, and can be done on the fly. Thus, the decompression 2201590Srgrimes * procedure needs no input table, but tracks the way the table was built. 2211590Srgrimes */ 2221590Srgrimes 2231590Srgrimes/*- 2241590Srgrimes * compress write 2251590Srgrimes * 2261590Srgrimes * Algorithm: use open addressing double hashing (no chaining) on the 2271590Srgrimes * prefix code / next character combination. We do a variant of Knuth's 2281590Srgrimes * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime 2291590Srgrimes * secondary probe. Here, the modular division first probe is gives way 2301590Srgrimes * to a faster exclusive-or manipulation. Also do block compression with 2311590Srgrimes * an adaptive reset, whereby the code table is cleared when the compression 2321590Srgrimes * ratio decreases, but after the table fills. The variable-length output 2331590Srgrimes * codes are re-sized at this point, and a special CLEAR code is generated 2341590Srgrimes * for the decompressor. Late addition: construct the table according to 2351590Srgrimes * file size for noticeable speed improvement on small files. Please direct 2361590Srgrimes * questions about this implementation to ames!jaw. 2371590Srgrimes */ 2381590Srgrimesstatic int 2391590Srgrimeszwrite(cookie, wbp, num) 2401590Srgrimes void *cookie; 2411590Srgrimes const char *wbp; 2421590Srgrimes int num; 2431590Srgrimes{ 24487214Smarkm code_int i; 24587214Smarkm int c, disp; 2461590Srgrimes struct s_zstate *zs; 2471590Srgrimes const u_char *bp; 2481590Srgrimes u_char tmp; 2491590Srgrimes int count; 2501590Srgrimes 2511590Srgrimes if (num == 0) 2521590Srgrimes return (0); 2531590Srgrimes 2541590Srgrimes zs = cookie; 2551590Srgrimes count = num; 25687214Smarkm bp = wbp; 2571590Srgrimes if (state == S_MIDDLE) 2581590Srgrimes goto middle; 2591590Srgrimes state = S_MIDDLE; 2601590Srgrimes 2615342Sats maxmaxcode = 1L << maxbits; 2621590Srgrimes if (fwrite(magic_header, 2631590Srgrimes sizeof(char), sizeof(magic_header), fp) != sizeof(magic_header)) 2641590Srgrimes return (-1); 2655302Sjkh tmp = (u_char)((maxbits) | block_compress); 2661590Srgrimes if (fwrite(&tmp, sizeof(char), sizeof(tmp), fp) != sizeof(tmp)) 2671590Srgrimes return (-1); 2681590Srgrimes 2691590Srgrimes offset = 0; 2701590Srgrimes bytes_out = 3; /* Includes 3-byte header mojo. */ 2711590Srgrimes out_count = 0; 2721590Srgrimes clear_flg = 0; 2731590Srgrimes ratio = 0; 2741590Srgrimes in_count = 1; 2751590Srgrimes checkpoint = CHECK_GAP; 2761590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 2771590Srgrimes free_ent = ((block_compress) ? FIRST : 256); 2781590Srgrimes 2791590Srgrimes ent = *bp++; 2801590Srgrimes --count; 2811590Srgrimes 2821590Srgrimes hshift = 0; 2831590Srgrimes for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L) 2841590Srgrimes hshift++; 2851590Srgrimes hshift = 8 - hshift; /* Set hash code range bound. */ 2861590Srgrimes 2871590Srgrimes hsize_reg = hsize; 2881590Srgrimes cl_hash(zs, (count_int)hsize_reg); /* Clear hash table. */ 2891590Srgrimes 2901590Srgrimesmiddle: for (i = 0; count--;) { 2911590Srgrimes c = *bp++; 2921590Srgrimes in_count++; 2931590Srgrimes fcode = (long)(((long)c << maxbits) + ent); 2941590Srgrimes i = ((c << hshift) ^ ent); /* Xor hashing. */ 2951590Srgrimes 2961590Srgrimes if (htabof(i) == fcode) { 2971590Srgrimes ent = codetabof(i); 2981590Srgrimes continue; 2991590Srgrimes } else if ((long)htabof(i) < 0) /* Empty slot. */ 3001590Srgrimes goto nomatch; 3011590Srgrimes disp = hsize_reg - i; /* Secondary hash (after G. Knott). */ 3021590Srgrimes if (i == 0) 3031590Srgrimes disp = 1; 3041590Srgrimesprobe: if ((i -= disp) < 0) 3051590Srgrimes i += hsize_reg; 3061590Srgrimes 3071590Srgrimes if (htabof(i) == fcode) { 3081590Srgrimes ent = codetabof(i); 3091590Srgrimes continue; 3101590Srgrimes } 3111590Srgrimes if ((long)htabof(i) >= 0) 3121590Srgrimes goto probe; 3131590Srgrimesnomatch: if (output(zs, (code_int) ent) == -1) 3141590Srgrimes return (-1); 3151590Srgrimes out_count++; 3161590Srgrimes ent = c; 3171590Srgrimes if (free_ent < maxmaxcode) { 3181590Srgrimes codetabof(i) = free_ent++; /* code -> hashtable */ 3191590Srgrimes htabof(i) = fcode; 3201590Srgrimes } else if ((count_int)in_count >= 3211590Srgrimes checkpoint && block_compress) { 3221590Srgrimes if (cl_block(zs) == -1) 3231590Srgrimes return (-1); 3241590Srgrimes } 3251590Srgrimes } 3261590Srgrimes return (num); 3271590Srgrimes} 3281590Srgrimes 3291590Srgrimesstatic int 3301590Srgrimeszclose(cookie) 3311590Srgrimes void *cookie; 3321590Srgrimes{ 3331590Srgrimes struct s_zstate *zs; 3341590Srgrimes int rval; 3351590Srgrimes 3361590Srgrimes zs = cookie; 3371590Srgrimes if (zmode == 'w') { /* Put out the final code. */ 3381590Srgrimes if (output(zs, (code_int) ent) == -1) { 3391590Srgrimes (void)fclose(fp); 3401590Srgrimes free(zs); 3411590Srgrimes return (-1); 3421590Srgrimes } 3431590Srgrimes out_count++; 3441590Srgrimes if (output(zs, (code_int) - 1) == -1) { 3451590Srgrimes (void)fclose(fp); 3461590Srgrimes free(zs); 3471590Srgrimes return (-1); 3481590Srgrimes } 3491590Srgrimes } 3501590Srgrimes rval = fclose(fp) == EOF ? -1 : 0; 3511590Srgrimes free(zs); 3521590Srgrimes return (rval); 3531590Srgrimes} 3541590Srgrimes 3551590Srgrimes/*- 3561590Srgrimes * Output the given code. 3571590Srgrimes * Inputs: 3581590Srgrimes * code: A n_bits-bit integer. If == -1, then EOF. This assumes 3591590Srgrimes * that n_bits =< (long)wordsize - 1. 3601590Srgrimes * Outputs: 3611590Srgrimes * Outputs code to the file. 3621590Srgrimes * Assumptions: 3631590Srgrimes * Chars are 8 bits long. 3641590Srgrimes * Algorithm: 3651590Srgrimes * Maintain a BITS character long buffer (so that 8 codes will 3661590Srgrimes * fit in it exactly). Use the VAX insv instruction to insert each 3671590Srgrimes * code in turn. When the buffer fills up empty it and start over. 3681590Srgrimes */ 3691590Srgrimes 3701590Srgrimesstatic char_type lmask[9] = 3711590Srgrimes {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; 3721590Srgrimesstatic char_type rmask[9] = 3731590Srgrimes {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 3741590Srgrimes 3751590Srgrimesstatic int 3761590Srgrimesoutput(zs, ocode) 3771590Srgrimes struct s_zstate *zs; 3781590Srgrimes code_int ocode; 3791590Srgrimes{ 38087214Smarkm int r_off; 38187247Smarkm u_int bits; 38287214Smarkm char_type *bp; 3831590Srgrimes 3841590Srgrimes r_off = offset; 3851590Srgrimes bits = n_bits; 3861590Srgrimes bp = buf; 3871590Srgrimes if (ocode >= 0) { 3881590Srgrimes /* Get to the first byte. */ 3891590Srgrimes bp += (r_off >> 3); 3901590Srgrimes r_off &= 7; 3911590Srgrimes /* 3921590Srgrimes * Since ocode is always >= 8 bits, only need to mask the first 3931590Srgrimes * hunk on the left. 3941590Srgrimes */ 39541568Sarchie *bp = (*bp & rmask[r_off]) | ((ocode << r_off) & lmask[r_off]); 3961590Srgrimes bp++; 3971590Srgrimes bits -= (8 - r_off); 3981590Srgrimes ocode >>= 8 - r_off; 3991590Srgrimes /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 4001590Srgrimes if (bits >= 8) { 4011590Srgrimes *bp++ = ocode; 4021590Srgrimes ocode >>= 8; 4031590Srgrimes bits -= 8; 4041590Srgrimes } 4051590Srgrimes /* Last bits. */ 4061590Srgrimes if (bits) 4071590Srgrimes *bp = ocode; 4081590Srgrimes offset += n_bits; 4091590Srgrimes if (offset == (n_bits << 3)) { 4101590Srgrimes bp = buf; 4111590Srgrimes bits = n_bits; 4121590Srgrimes bytes_out += bits; 4131590Srgrimes if (fwrite(bp, sizeof(char), bits, fp) != bits) 4141590Srgrimes return (-1); 4151590Srgrimes bp += bits; 4161590Srgrimes bits = 0; 4171590Srgrimes offset = 0; 4181590Srgrimes } 4191590Srgrimes /* 4201590Srgrimes * If the next entry is going to be too big for the ocode size, 4211590Srgrimes * then increase it, if possible. 4221590Srgrimes */ 4231590Srgrimes if (free_ent > maxcode || (clear_flg > 0)) { 4241590Srgrimes /* 4251590Srgrimes * Write the whole buffer, because the input side won't 4261590Srgrimes * discover the size increase until after it has read it. 4271590Srgrimes */ 4281590Srgrimes if (offset > 0) { 4291590Srgrimes if (fwrite(buf, 1, n_bits, fp) != n_bits) 4301590Srgrimes return (-1); 4311590Srgrimes bytes_out += n_bits; 4321590Srgrimes } 4331590Srgrimes offset = 0; 4341590Srgrimes 4351590Srgrimes if (clear_flg) { 4361590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 4371590Srgrimes clear_flg = 0; 4381590Srgrimes } else { 4391590Srgrimes n_bits++; 4401590Srgrimes if (n_bits == maxbits) 4411590Srgrimes maxcode = maxmaxcode; 4421590Srgrimes else 4431590Srgrimes maxcode = MAXCODE(n_bits); 4441590Srgrimes } 4451590Srgrimes } 4461590Srgrimes } else { 4471590Srgrimes /* At EOF, write the rest of the buffer. */ 4481590Srgrimes if (offset > 0) { 4491590Srgrimes offset = (offset + 7) / 8; 4501590Srgrimes if (fwrite(buf, 1, offset, fp) != offset) 4511590Srgrimes return (-1); 4521590Srgrimes bytes_out += offset; 4531590Srgrimes } 4541590Srgrimes offset = 0; 4551590Srgrimes } 4561590Srgrimes return (0); 4571590Srgrimes} 4581590Srgrimes 4591590Srgrimes/* 4601590Srgrimes * Decompress read. This routine adapts to the codes in the file building 4611590Srgrimes * the "string" table on-the-fly; requiring no table to be stored in the 4621590Srgrimes * compressed file. The tables used herein are shared with those of the 4631590Srgrimes * compress() routine. See the definitions above. 4641590Srgrimes */ 4651590Srgrimesstatic int 4661590Srgrimeszread(cookie, rbp, num) 4671590Srgrimes void *cookie; 4681590Srgrimes char *rbp; 4691590Srgrimes int num; 4701590Srgrimes{ 47187214Smarkm u_int count; 4721590Srgrimes struct s_zstate *zs; 4731590Srgrimes u_char *bp, header[3]; 4741590Srgrimes 4751590Srgrimes if (num == 0) 4761590Srgrimes return (0); 4771590Srgrimes 4781590Srgrimes zs = cookie; 4791590Srgrimes count = num; 4801590Srgrimes bp = (u_char *)rbp; 4811590Srgrimes switch (state) { 4821590Srgrimes case S_START: 4831590Srgrimes state = S_MIDDLE; 4841590Srgrimes break; 4851590Srgrimes case S_MIDDLE: 4861590Srgrimes goto middle; 4871590Srgrimes case S_EOF: 4881590Srgrimes goto eof; 4891590Srgrimes } 4901590Srgrimes 4911590Srgrimes /* Check the magic number */ 4921590Srgrimes if (fread(header, 4931590Srgrimes sizeof(char), sizeof(header), fp) != sizeof(header) || 4941590Srgrimes memcmp(header, magic_header, sizeof(magic_header)) != 0) { 4951590Srgrimes errno = EFTYPE; 4961590Srgrimes return (-1); 4971590Srgrimes } 4981590Srgrimes maxbits = header[2]; /* Set -b from file. */ 4991590Srgrimes block_compress = maxbits & BLOCK_MASK; 5001590Srgrimes maxbits &= BIT_MASK; 5011590Srgrimes maxmaxcode = 1L << maxbits; 5021590Srgrimes if (maxbits > BITS) { 5031590Srgrimes errno = EFTYPE; 5041590Srgrimes return (-1); 5051590Srgrimes } 5061590Srgrimes /* As above, initialize the first 256 entries in the table. */ 5071590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 5081590Srgrimes for (code = 255; code >= 0; code--) { 5091590Srgrimes tab_prefixof(code) = 0; 5101590Srgrimes tab_suffixof(code) = (char_type) code; 5111590Srgrimes } 5121590Srgrimes free_ent = block_compress ? FIRST : 256; 5131590Srgrimes 5141590Srgrimes finchar = oldcode = getcode(zs); 5151590Srgrimes if (oldcode == -1) /* EOF already? */ 5161590Srgrimes return (0); /* Get out of here */ 5171590Srgrimes 5181590Srgrimes /* First code must be 8 bits = char. */ 5191590Srgrimes *bp++ = (u_char)finchar; 5201590Srgrimes count--; 5211590Srgrimes stackp = de_stack; 5221590Srgrimes 5231590Srgrimes while ((code = getcode(zs)) > -1) { 5241590Srgrimes 5251590Srgrimes if ((code == CLEAR) && block_compress) { 5261590Srgrimes for (code = 255; code >= 0; code--) 5271590Srgrimes tab_prefixof(code) = 0; 5281590Srgrimes clear_flg = 1; 5291590Srgrimes free_ent = FIRST - 1; 5301590Srgrimes if ((code = getcode(zs)) == -1) /* O, untimely death! */ 5311590Srgrimes break; 5321590Srgrimes } 5331590Srgrimes incode = code; 5341590Srgrimes 5351590Srgrimes /* Special case for KwKwK string. */ 5361590Srgrimes if (code >= free_ent) { 5371590Srgrimes *stackp++ = finchar; 5381590Srgrimes code = oldcode; 5391590Srgrimes } 5401590Srgrimes 5411590Srgrimes /* Generate output characters in reverse order. */ 5421590Srgrimes while (code >= 256) { 5431590Srgrimes *stackp++ = tab_suffixof(code); 5441590Srgrimes code = tab_prefixof(code); 5451590Srgrimes } 5461590Srgrimes *stackp++ = finchar = tab_suffixof(code); 5471590Srgrimes 5481590Srgrimes /* And put them out in forward order. */ 5491590Srgrimesmiddle: do { 5501590Srgrimes if (count-- == 0) 5511590Srgrimes return (num); 5521590Srgrimes *bp++ = *--stackp; 5531590Srgrimes } while (stackp > de_stack); 5541590Srgrimes 5551590Srgrimes /* Generate the new entry. */ 5561590Srgrimes if ((code = free_ent) < maxmaxcode) { 5571590Srgrimes tab_prefixof(code) = (u_short) oldcode; 5581590Srgrimes tab_suffixof(code) = finchar; 5591590Srgrimes free_ent = code + 1; 5601590Srgrimes } 5611590Srgrimes 5621590Srgrimes /* Remember previous code. */ 5631590Srgrimes oldcode = incode; 5641590Srgrimes } 5651590Srgrimes state = S_EOF; 5661590Srgrimeseof: return (num - count); 5671590Srgrimes} 5681590Srgrimes 5691590Srgrimes/*- 5701590Srgrimes * Read one code from the standard input. If EOF, return -1. 5711590Srgrimes * Inputs: 5721590Srgrimes * stdin 5731590Srgrimes * Outputs: 5741590Srgrimes * code or -1 is returned. 5751590Srgrimes */ 5761590Srgrimesstatic code_int 5771590Srgrimesgetcode(zs) 5781590Srgrimes struct s_zstate *zs; 5791590Srgrimes{ 58087214Smarkm code_int gcode; 58187214Smarkm int r_off, bits; 58287214Smarkm char_type *bp; 5831590Srgrimes 5841590Srgrimes bp = gbuf; 5851590Srgrimes if (clear_flg > 0 || roffset >= size || free_ent > maxcode) { 5861590Srgrimes /* 5871590Srgrimes * If the next entry will be too big for the current gcode 5881590Srgrimes * size, then we must increase the size. This implies reading 5891590Srgrimes * a new buffer full, too. 5901590Srgrimes */ 5911590Srgrimes if (free_ent > maxcode) { 5921590Srgrimes n_bits++; 5931590Srgrimes if (n_bits == maxbits) /* Won't get any bigger now. */ 5941590Srgrimes maxcode = maxmaxcode; 5951590Srgrimes else 5961590Srgrimes maxcode = MAXCODE(n_bits); 5971590Srgrimes } 5981590Srgrimes if (clear_flg > 0) { 5991590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 6001590Srgrimes clear_flg = 0; 6011590Srgrimes } 6021590Srgrimes size = fread(gbuf, 1, n_bits, fp); 6031590Srgrimes if (size <= 0) /* End of file. */ 6041590Srgrimes return (-1); 6051590Srgrimes roffset = 0; 6061590Srgrimes /* Round size down to integral number of codes. */ 6071590Srgrimes size = (size << 3) - (n_bits - 1); 6081590Srgrimes } 6091590Srgrimes r_off = roffset; 6101590Srgrimes bits = n_bits; 6111590Srgrimes 6121590Srgrimes /* Get to the first byte. */ 6131590Srgrimes bp += (r_off >> 3); 6141590Srgrimes r_off &= 7; 6151590Srgrimes 6161590Srgrimes /* Get first part (low order bits). */ 6171590Srgrimes gcode = (*bp++ >> r_off); 6181590Srgrimes bits -= (8 - r_off); 6191590Srgrimes r_off = 8 - r_off; /* Now, roffset into gcode word. */ 6201590Srgrimes 6211590Srgrimes /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 6221590Srgrimes if (bits >= 8) { 6231590Srgrimes gcode |= *bp++ << r_off; 6241590Srgrimes r_off += 8; 6251590Srgrimes bits -= 8; 6261590Srgrimes } 6271590Srgrimes 6281590Srgrimes /* High order bits. */ 6291590Srgrimes gcode |= (*bp & rmask[bits]) << r_off; 6301590Srgrimes roffset += n_bits; 6311590Srgrimes 6321590Srgrimes return (gcode); 6331590Srgrimes} 6341590Srgrimes 6351590Srgrimesstatic int 6361590Srgrimescl_block(zs) /* Table clear for block compress. */ 6371590Srgrimes struct s_zstate *zs; 6381590Srgrimes{ 63987214Smarkm long rat; 6401590Srgrimes 6411590Srgrimes checkpoint = in_count + CHECK_GAP; 6421590Srgrimes 6431590Srgrimes if (in_count > 0x007fffff) { /* Shift will overflow. */ 6441590Srgrimes rat = bytes_out >> 8; 6451590Srgrimes if (rat == 0) /* Don't divide by zero. */ 6461590Srgrimes rat = 0x7fffffff; 6471590Srgrimes else 6481590Srgrimes rat = in_count / rat; 6491590Srgrimes } else 6501590Srgrimes rat = (in_count << 8) / bytes_out; /* 8 fractional bits. */ 6511590Srgrimes if (rat > ratio) 6521590Srgrimes ratio = rat; 6531590Srgrimes else { 6541590Srgrimes ratio = 0; 6551590Srgrimes cl_hash(zs, (count_int) hsize); 6561590Srgrimes free_ent = FIRST; 6571590Srgrimes clear_flg = 1; 6581590Srgrimes if (output(zs, (code_int) CLEAR) == -1) 6591590Srgrimes return (-1); 6601590Srgrimes } 6611590Srgrimes return (0); 6621590Srgrimes} 6631590Srgrimes 6641590Srgrimesstatic void 6651590Srgrimescl_hash(zs, cl_hsize) /* Reset code table. */ 6661590Srgrimes struct s_zstate *zs; 66787214Smarkm count_int cl_hsize; 6681590Srgrimes{ 66987214Smarkm count_int *htab_p; 67087214Smarkm long i, m1; 6711590Srgrimes 6721590Srgrimes m1 = -1; 6731590Srgrimes htab_p = htab + cl_hsize; 6741590Srgrimes i = cl_hsize - 16; 6751590Srgrimes do { /* Might use Sys V memset(3) here. */ 6761590Srgrimes *(htab_p - 16) = m1; 6771590Srgrimes *(htab_p - 15) = m1; 6781590Srgrimes *(htab_p - 14) = m1; 6791590Srgrimes *(htab_p - 13) = m1; 6801590Srgrimes *(htab_p - 12) = m1; 6811590Srgrimes *(htab_p - 11) = m1; 6821590Srgrimes *(htab_p - 10) = m1; 6831590Srgrimes *(htab_p - 9) = m1; 6841590Srgrimes *(htab_p - 8) = m1; 6851590Srgrimes *(htab_p - 7) = m1; 6861590Srgrimes *(htab_p - 6) = m1; 6871590Srgrimes *(htab_p - 5) = m1; 6881590Srgrimes *(htab_p - 4) = m1; 6891590Srgrimes *(htab_p - 3) = m1; 6901590Srgrimes *(htab_p - 2) = m1; 6911590Srgrimes *(htab_p - 1) = m1; 6921590Srgrimes htab_p -= 16; 6931590Srgrimes } while ((i -= 16) >= 0); 6941590Srgrimes for (i += 16; i > 0; i--) 6951590Srgrimes *--htab_p = m1; 6961590Srgrimes} 6971590Srgrimes 6981590SrgrimesFILE * 6991590Srgrimeszopen(fname, mode, bits) 7001590Srgrimes const char *fname, *mode; 7011590Srgrimes int bits; 7021590Srgrimes{ 7031590Srgrimes struct s_zstate *zs; 7041590Srgrimes 70541568Sarchie if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' || 7061590Srgrimes bits < 0 || bits > BITS) { 7071590Srgrimes errno = EINVAL; 7081590Srgrimes return (NULL); 7091590Srgrimes } 7101590Srgrimes 7111590Srgrimes if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) 7121590Srgrimes return (NULL); 7131590Srgrimes 7141590Srgrimes maxbits = bits ? bits : BITS; /* User settable max # bits/code. */ 7155342Sats maxmaxcode = 1L << maxbits; /* Should NEVER generate this code. */ 7161590Srgrimes hsize = HSIZE; /* For dynamic table sizing. */ 7171590Srgrimes free_ent = 0; /* First unused entry. */ 7181590Srgrimes block_compress = BLOCK_MASK; 7191590Srgrimes clear_flg = 0; 7201590Srgrimes ratio = 0; 7211590Srgrimes checkpoint = CHECK_GAP; 7221590Srgrimes in_count = 1; /* Length of input. */ 7231590Srgrimes out_count = 0; /* # of codes output (for debugging). */ 7241590Srgrimes state = S_START; 7251590Srgrimes roffset = 0; 7261590Srgrimes size = 0; 7271590Srgrimes 7281590Srgrimes /* 7291590Srgrimes * Layering compress on top of stdio in order to provide buffering, 7301590Srgrimes * and ensure that reads and write work with the data specified. 7311590Srgrimes */ 7321590Srgrimes if ((fp = fopen(fname, mode)) == NULL) { 7331590Srgrimes free(zs); 7341590Srgrimes return (NULL); 7351590Srgrimes } 7361590Srgrimes switch (*mode) { 7371590Srgrimes case 'r': 7381590Srgrimes zmode = 'r'; 7391590Srgrimes return (funopen(zs, zread, NULL, NULL, zclose)); 7401590Srgrimes case 'w': 7411590Srgrimes zmode = 'w'; 7421590Srgrimes return (funopen(zs, NULL, zwrite, NULL, zclose)); 7431590Srgrimes } 7441590Srgrimes /* NOTREACHED */ 74541568Sarchie return (NULL); 7461590Srgrimes} 747