zopen.c revision 87214
11590Srgrimes/*- 21590Srgrimes * Copyright (c) 1985, 1986, 1992, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * This code is derived from software contributed to Berkeley by 61590Srgrimes * Diomidis Spinellis and James A. Woods, derived from original 71590Srgrimes * work by Spencer Thomas and Joseph Orost. 81590Srgrimes * 91590Srgrimes * Redistribution and use in source and binary forms, with or without 101590Srgrimes * modification, are permitted provided that the following conditions 111590Srgrimes * are met: 121590Srgrimes * 1. Redistributions of source code must retain the above copyright 131590Srgrimes * notice, this list of conditions and the following disclaimer. 141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151590Srgrimes * notice, this list of conditions and the following disclaimer in the 161590Srgrimes * documentation and/or other materials provided with the distribution. 171590Srgrimes * 3. All advertising materials mentioning features or use of this software 181590Srgrimes * must display the following acknowledgement: 191590Srgrimes * This product includes software developed by the University of 201590Srgrimes * California, Berkeley and its contributors. 211590Srgrimes * 4. Neither the name of the University nor the names of its contributors 221590Srgrimes * may be used to endorse or promote products derived from this software 231590Srgrimes * without specific prior written permission. 241590Srgrimes * 251590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 261590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 271590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 281590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 291590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 301590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 311590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 321590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 331590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 341590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 351590Srgrimes * SUCH DAMAGE. 3687214Smarkm * 3787214Smarkm * $FreeBSD: head/usr.bin/compress/zopen.c 87214 2001-12-02 13:31:22Z markm $ 381590Srgrimes */ 391590Srgrimes 401590Srgrimes#if defined(LIBC_SCCS) && !defined(lint) 411590Srgrimesstatic char sccsid[] = "@(#)zopen.c 8.1 (Berkeley) 6/27/93"; 421590Srgrimes#endif /* LIBC_SCCS and not lint */ 431590Srgrimes 441590Srgrimes/*- 451590Srgrimes * fcompress.c - File compression ala IEEE Computer, June 1984. 461590Srgrimes * 471590Srgrimes * Compress authors: 481590Srgrimes * Spencer W. Thomas (decvax!utah-cs!thomas) 491590Srgrimes * Jim McKie (decvax!mcvax!jim) 501590Srgrimes * Steve Davies (decvax!vax135!petsd!peora!srd) 511590Srgrimes * Ken Turkowski (decvax!decwrl!turtlevax!ken) 521590Srgrimes * James A. Woods (decvax!ihnp4!ames!jaw) 531590Srgrimes * Joe Orost (decvax!vax135!petsd!joe) 541590Srgrimes * 551590Srgrimes * Cleaned up and converted to library returning I/O streams by 561590Srgrimes * Diomidis Spinellis <dds@doc.ic.ac.uk>. 571590Srgrimes * 581590Srgrimes * zopen(filename, mode, bits) 591590Srgrimes * Returns a FILE * that can be used for read or write. The modes 601590Srgrimes * supported are only "r" and "w". Seeking is not allowed. On 611590Srgrimes * reading the file is decompressed, on writing it is compressed. 621590Srgrimes * The output is compatible with compress(1) with 16 bit tables. 631590Srgrimes * Any file produced by compress(1) can be read. 641590Srgrimes */ 651590Srgrimes 661590Srgrimes#include <sys/param.h> 671590Srgrimes#include <sys/stat.h> 681590Srgrimes 691590Srgrimes#include <ctype.h> 701590Srgrimes#include <errno.h> 711590Srgrimes#include <signal.h> 721590Srgrimes#include <stdio.h> 731590Srgrimes#include <stdlib.h> 741590Srgrimes#include <string.h> 751590Srgrimes#include <unistd.h> 7617717Swosch#include "zopen.h" 771590Srgrimes 781590Srgrimes#define BITS 16 /* Default bits. */ 791590Srgrimes#define HSIZE 69001 /* 95% occupancy */ 801590Srgrimes 811590Srgrimes/* A code_int must be able to hold 2**BITS values of type int, and also -1. */ 821590Srgrimestypedef long code_int; 831590Srgrimestypedef long count_int; 841590Srgrimes 851590Srgrimestypedef u_char char_type; 861590Srgrimesstatic char_type magic_header[] = 871590Srgrimes {'\037', '\235'}; /* 1F 9D */ 881590Srgrimes 891590Srgrimes#define BIT_MASK 0x1f /* Defines for third byte of header. */ 901590Srgrimes#define BLOCK_MASK 0x80 911590Srgrimes 921590Srgrimes/* 931590Srgrimes * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 941590Srgrimes * a fourth header byte (for expansion). 951590Srgrimes */ 961590Srgrimes#define INIT_BITS 9 /* Initial number of bits/code. */ 971590Srgrimes 981590Srgrimes#define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 991590Srgrimes 1001590Srgrimesstruct s_zstate { 1011590Srgrimes FILE *zs_fp; /* File stream for I/O */ 1021590Srgrimes char zs_mode; /* r or w */ 1031590Srgrimes enum { 1041590Srgrimes S_START, S_MIDDLE, S_EOF 1051590Srgrimes } zs_state; /* State of computation */ 10687214Smarkm size_t zs_n_bits; /* Number of bits/code. */ 10787214Smarkm size_t zs_maxbits; /* User settable max # bits/code. */ 1081590Srgrimes code_int zs_maxcode; /* Maximum code, given n_bits. */ 1091590Srgrimes code_int zs_maxmaxcode; /* Should NEVER generate this code. */ 1101590Srgrimes count_int zs_htab [HSIZE]; 1111590Srgrimes u_short zs_codetab [HSIZE]; 1121590Srgrimes code_int zs_hsize; /* For dynamic table sizing. */ 1131590Srgrimes code_int zs_free_ent; /* First unused entry. */ 1141590Srgrimes /* 1151590Srgrimes * Block compression parameters -- after all codes are used up, 1161590Srgrimes * and compression rate changes, start over. 1171590Srgrimes */ 1181590Srgrimes int zs_block_compress; 1191590Srgrimes int zs_clear_flg; 1201590Srgrimes long zs_ratio; 1211590Srgrimes count_int zs_checkpoint; 12287214Smarkm size_t zs_offset; 1231590Srgrimes long zs_in_count; /* Length of input. */ 1241590Srgrimes long zs_bytes_out; /* Length of compressed output. */ 1251590Srgrimes long zs_out_count; /* # of codes output (for debugging). */ 1261590Srgrimes char_type zs_buf[BITS]; 1271590Srgrimes union { 1281590Srgrimes struct { 1291590Srgrimes long zs_fcode; 1301590Srgrimes code_int zs_ent; 1311590Srgrimes code_int zs_hsize_reg; 1321590Srgrimes int zs_hshift; 1331590Srgrimes } w; /* Write paramenters */ 1341590Srgrimes struct { 1351590Srgrimes char_type *zs_stackp; 1361590Srgrimes int zs_finchar; 1371590Srgrimes code_int zs_code, zs_oldcode, zs_incode; 1381590Srgrimes int zs_roffset, zs_size; 1391590Srgrimes char_type zs_gbuf[BITS]; 1401590Srgrimes } r; /* Read parameters */ 1411590Srgrimes } u; 1421590Srgrimes}; 1431590Srgrimes 1441590Srgrimes/* Definitions to retain old variable names */ 1451590Srgrimes#define fp zs->zs_fp 1461590Srgrimes#define zmode zs->zs_mode 1471590Srgrimes#define state zs->zs_state 1481590Srgrimes#define n_bits zs->zs_n_bits 1491590Srgrimes#define maxbits zs->zs_maxbits 1501590Srgrimes#define maxcode zs->zs_maxcode 1511590Srgrimes#define maxmaxcode zs->zs_maxmaxcode 1521590Srgrimes#define htab zs->zs_htab 1531590Srgrimes#define codetab zs->zs_codetab 1541590Srgrimes#define hsize zs->zs_hsize 1551590Srgrimes#define free_ent zs->zs_free_ent 1561590Srgrimes#define block_compress zs->zs_block_compress 1571590Srgrimes#define clear_flg zs->zs_clear_flg 1581590Srgrimes#define ratio zs->zs_ratio 1591590Srgrimes#define checkpoint zs->zs_checkpoint 1601590Srgrimes#define offset zs->zs_offset 1611590Srgrimes#define in_count zs->zs_in_count 1621590Srgrimes#define bytes_out zs->zs_bytes_out 1631590Srgrimes#define out_count zs->zs_out_count 1641590Srgrimes#define buf zs->zs_buf 1651590Srgrimes#define fcode zs->u.w.zs_fcode 1661590Srgrimes#define hsize_reg zs->u.w.zs_hsize_reg 1671590Srgrimes#define ent zs->u.w.zs_ent 1681590Srgrimes#define hshift zs->u.w.zs_hshift 1691590Srgrimes#define stackp zs->u.r.zs_stackp 1701590Srgrimes#define finchar zs->u.r.zs_finchar 1711590Srgrimes#define code zs->u.r.zs_code 1721590Srgrimes#define oldcode zs->u.r.zs_oldcode 1731590Srgrimes#define incode zs->u.r.zs_incode 1741590Srgrimes#define roffset zs->u.r.zs_roffset 1751590Srgrimes#define size zs->u.r.zs_size 1761590Srgrimes#define gbuf zs->u.r.zs_gbuf 1771590Srgrimes 1781590Srgrimes/* 1791590Srgrimes * To save much memory, we overlay the table used by compress() with those 1801590Srgrimes * used by decompress(). The tab_prefix table is the same size and type as 1811590Srgrimes * the codetab. The tab_suffix table needs 2**BITS characters. We get this 1821590Srgrimes * from the beginning of htab. The output stack uses the rest of htab, and 1831590Srgrimes * contains characters. There is plenty of room for any possible stack 1841590Srgrimes * (stack used to be 8000 characters). 1851590Srgrimes */ 1861590Srgrimes 1871590Srgrimes#define htabof(i) htab[i] 1881590Srgrimes#define codetabof(i) codetab[i] 1891590Srgrimes 1901590Srgrimes#define tab_prefixof(i) codetabof(i) 1911590Srgrimes#define tab_suffixof(i) ((char_type *)(htab))[i] 1921590Srgrimes#define de_stack ((char_type *)&tab_suffixof(1 << BITS)) 1931590Srgrimes 1941590Srgrimes#define CHECK_GAP 10000 /* Ratio check interval. */ 1951590Srgrimes 1961590Srgrimes/* 1971590Srgrimes * the next two codes should not be changed lightly, as they must not 1981590Srgrimes * lie within the contiguous general code space. 1991590Srgrimes */ 2001590Srgrimes#define FIRST 257 /* First free entry. */ 2011590Srgrimes#define CLEAR 256 /* Table clear output code. */ 2021590Srgrimes 2031590Srgrimesstatic int cl_block __P((struct s_zstate *)); 2041590Srgrimesstatic void cl_hash __P((struct s_zstate *, count_int)); 2051590Srgrimesstatic code_int getcode __P((struct s_zstate *)); 2061590Srgrimesstatic int output __P((struct s_zstate *, code_int)); 2071590Srgrimesstatic int zclose __P((void *)); 2081590Srgrimesstatic int zread __P((void *, char *, int)); 2091590Srgrimesstatic int zwrite __P((void *, const char *, int)); 2101590Srgrimes 2111590Srgrimes/*- 2121590Srgrimes * Algorithm from "A Technique for High Performance Data Compression", 2131590Srgrimes * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. 2141590Srgrimes * 2151590Srgrimes * Algorithm: 2161590Srgrimes * Modified Lempel-Ziv method (LZW). Basically finds common 2171590Srgrimes * substrings and replaces them with a variable size code. This is 2181590Srgrimes * deterministic, and can be done on the fly. Thus, the decompression 2191590Srgrimes * procedure needs no input table, but tracks the way the table was built. 2201590Srgrimes */ 2211590Srgrimes 2221590Srgrimes/*- 2231590Srgrimes * compress write 2241590Srgrimes * 2251590Srgrimes * Algorithm: use open addressing double hashing (no chaining) on the 2261590Srgrimes * prefix code / next character combination. We do a variant of Knuth's 2271590Srgrimes * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime 2281590Srgrimes * secondary probe. Here, the modular division first probe is gives way 2291590Srgrimes * to a faster exclusive-or manipulation. Also do block compression with 2301590Srgrimes * an adaptive reset, whereby the code table is cleared when the compression 2311590Srgrimes * ratio decreases, but after the table fills. The variable-length output 2321590Srgrimes * codes are re-sized at this point, and a special CLEAR code is generated 2331590Srgrimes * for the decompressor. Late addition: construct the table according to 2341590Srgrimes * file size for noticeable speed improvement on small files. Please direct 2351590Srgrimes * questions about this implementation to ames!jaw. 2361590Srgrimes */ 2371590Srgrimesstatic int 2381590Srgrimeszwrite(cookie, wbp, num) 2391590Srgrimes void *cookie; 2401590Srgrimes const char *wbp; 2411590Srgrimes int num; 2421590Srgrimes{ 24387214Smarkm code_int i; 24487214Smarkm int c, disp; 2451590Srgrimes struct s_zstate *zs; 2461590Srgrimes const u_char *bp; 2471590Srgrimes u_char tmp; 2481590Srgrimes int count; 2491590Srgrimes 2501590Srgrimes if (num == 0) 2511590Srgrimes return (0); 2521590Srgrimes 2531590Srgrimes zs = cookie; 2541590Srgrimes count = num; 25587214Smarkm bp = wbp; 2561590Srgrimes if (state == S_MIDDLE) 2571590Srgrimes goto middle; 2581590Srgrimes state = S_MIDDLE; 2591590Srgrimes 2605342Sats maxmaxcode = 1L << maxbits; 2611590Srgrimes if (fwrite(magic_header, 2621590Srgrimes sizeof(char), sizeof(magic_header), fp) != sizeof(magic_header)) 2631590Srgrimes return (-1); 2645302Sjkh tmp = (u_char)((maxbits) | block_compress); 2651590Srgrimes if (fwrite(&tmp, sizeof(char), sizeof(tmp), fp) != sizeof(tmp)) 2661590Srgrimes return (-1); 2671590Srgrimes 2681590Srgrimes offset = 0; 2691590Srgrimes bytes_out = 3; /* Includes 3-byte header mojo. */ 2701590Srgrimes out_count = 0; 2711590Srgrimes clear_flg = 0; 2721590Srgrimes ratio = 0; 2731590Srgrimes in_count = 1; 2741590Srgrimes checkpoint = CHECK_GAP; 2751590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 2761590Srgrimes free_ent = ((block_compress) ? FIRST : 256); 2771590Srgrimes 2781590Srgrimes ent = *bp++; 2791590Srgrimes --count; 2801590Srgrimes 2811590Srgrimes hshift = 0; 2821590Srgrimes for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L) 2831590Srgrimes hshift++; 2841590Srgrimes hshift = 8 - hshift; /* Set hash code range bound. */ 2851590Srgrimes 2861590Srgrimes hsize_reg = hsize; 2871590Srgrimes cl_hash(zs, (count_int)hsize_reg); /* Clear hash table. */ 2881590Srgrimes 2891590Srgrimesmiddle: for (i = 0; count--;) { 2901590Srgrimes c = *bp++; 2911590Srgrimes in_count++; 2921590Srgrimes fcode = (long)(((long)c << maxbits) + ent); 2931590Srgrimes i = ((c << hshift) ^ ent); /* Xor hashing. */ 2941590Srgrimes 2951590Srgrimes if (htabof(i) == fcode) { 2961590Srgrimes ent = codetabof(i); 2971590Srgrimes continue; 2981590Srgrimes } else if ((long)htabof(i) < 0) /* Empty slot. */ 2991590Srgrimes goto nomatch; 3001590Srgrimes disp = hsize_reg - i; /* Secondary hash (after G. Knott). */ 3011590Srgrimes if (i == 0) 3021590Srgrimes disp = 1; 3031590Srgrimesprobe: if ((i -= disp) < 0) 3041590Srgrimes i += hsize_reg; 3051590Srgrimes 3061590Srgrimes if (htabof(i) == fcode) { 3071590Srgrimes ent = codetabof(i); 3081590Srgrimes continue; 3091590Srgrimes } 3101590Srgrimes if ((long)htabof(i) >= 0) 3111590Srgrimes goto probe; 3121590Srgrimesnomatch: if (output(zs, (code_int) ent) == -1) 3131590Srgrimes return (-1); 3141590Srgrimes out_count++; 3151590Srgrimes ent = c; 3161590Srgrimes if (free_ent < maxmaxcode) { 3171590Srgrimes codetabof(i) = free_ent++; /* code -> hashtable */ 3181590Srgrimes htabof(i) = fcode; 3191590Srgrimes } else if ((count_int)in_count >= 3201590Srgrimes checkpoint && block_compress) { 3211590Srgrimes if (cl_block(zs) == -1) 3221590Srgrimes return (-1); 3231590Srgrimes } 3241590Srgrimes } 3251590Srgrimes return (num); 3261590Srgrimes} 3271590Srgrimes 3281590Srgrimesstatic int 3291590Srgrimeszclose(cookie) 3301590Srgrimes void *cookie; 3311590Srgrimes{ 3321590Srgrimes struct s_zstate *zs; 3331590Srgrimes int rval; 3341590Srgrimes 3351590Srgrimes zs = cookie; 3361590Srgrimes if (zmode == 'w') { /* Put out the final code. */ 3371590Srgrimes if (output(zs, (code_int) ent) == -1) { 3381590Srgrimes (void)fclose(fp); 3391590Srgrimes free(zs); 3401590Srgrimes return (-1); 3411590Srgrimes } 3421590Srgrimes out_count++; 3431590Srgrimes if (output(zs, (code_int) - 1) == -1) { 3441590Srgrimes (void)fclose(fp); 3451590Srgrimes free(zs); 3461590Srgrimes return (-1); 3471590Srgrimes } 3481590Srgrimes } 3491590Srgrimes rval = fclose(fp) == EOF ? -1 : 0; 3501590Srgrimes free(zs); 3511590Srgrimes return (rval); 3521590Srgrimes} 3531590Srgrimes 3541590Srgrimes/*- 3551590Srgrimes * Output the given code. 3561590Srgrimes * Inputs: 3571590Srgrimes * code: A n_bits-bit integer. If == -1, then EOF. This assumes 3581590Srgrimes * that n_bits =< (long)wordsize - 1. 3591590Srgrimes * Outputs: 3601590Srgrimes * Outputs code to the file. 3611590Srgrimes * Assumptions: 3621590Srgrimes * Chars are 8 bits long. 3631590Srgrimes * Algorithm: 3641590Srgrimes * Maintain a BITS character long buffer (so that 8 codes will 3651590Srgrimes * fit in it exactly). Use the VAX insv instruction to insert each 3661590Srgrimes * code in turn. When the buffer fills up empty it and start over. 3671590Srgrimes */ 3681590Srgrimes 3691590Srgrimesstatic char_type lmask[9] = 3701590Srgrimes {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; 3711590Srgrimesstatic char_type rmask[9] = 3721590Srgrimes {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 3731590Srgrimes 3741590Srgrimesstatic int 3751590Srgrimesoutput(zs, ocode) 3761590Srgrimes struct s_zstate *zs; 3771590Srgrimes code_int ocode; 3781590Srgrimes{ 37987214Smarkm int r_off; 38087214Smarkm size_t bits; 38187214Smarkm char_type *bp; 3821590Srgrimes 3831590Srgrimes r_off = offset; 3841590Srgrimes bits = n_bits; 3851590Srgrimes bp = buf; 3861590Srgrimes if (ocode >= 0) { 3871590Srgrimes /* Get to the first byte. */ 3881590Srgrimes bp += (r_off >> 3); 3891590Srgrimes r_off &= 7; 3901590Srgrimes /* 3911590Srgrimes * Since ocode is always >= 8 bits, only need to mask the first 3921590Srgrimes * hunk on the left. 3931590Srgrimes */ 39441568Sarchie *bp = (*bp & rmask[r_off]) | ((ocode << r_off) & lmask[r_off]); 3951590Srgrimes bp++; 3961590Srgrimes bits -= (8 - r_off); 3971590Srgrimes ocode >>= 8 - r_off; 3981590Srgrimes /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 3991590Srgrimes if (bits >= 8) { 4001590Srgrimes *bp++ = ocode; 4011590Srgrimes ocode >>= 8; 4021590Srgrimes bits -= 8; 4031590Srgrimes } 4041590Srgrimes /* Last bits. */ 4051590Srgrimes if (bits) 4061590Srgrimes *bp = ocode; 4071590Srgrimes offset += n_bits; 4081590Srgrimes if (offset == (n_bits << 3)) { 4091590Srgrimes bp = buf; 4101590Srgrimes bits = n_bits; 4111590Srgrimes bytes_out += bits; 4121590Srgrimes if (fwrite(bp, sizeof(char), bits, fp) != bits) 4131590Srgrimes return (-1); 4141590Srgrimes bp += bits; 4151590Srgrimes bits = 0; 4161590Srgrimes offset = 0; 4171590Srgrimes } 4181590Srgrimes /* 4191590Srgrimes * If the next entry is going to be too big for the ocode size, 4201590Srgrimes * then increase it, if possible. 4211590Srgrimes */ 4221590Srgrimes if (free_ent > maxcode || (clear_flg > 0)) { 4231590Srgrimes /* 4241590Srgrimes * Write the whole buffer, because the input side won't 4251590Srgrimes * discover the size increase until after it has read it. 4261590Srgrimes */ 4271590Srgrimes if (offset > 0) { 4281590Srgrimes if (fwrite(buf, 1, n_bits, fp) != n_bits) 4291590Srgrimes return (-1); 4301590Srgrimes bytes_out += n_bits; 4311590Srgrimes } 4321590Srgrimes offset = 0; 4331590Srgrimes 4341590Srgrimes if (clear_flg) { 4351590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 4361590Srgrimes clear_flg = 0; 4371590Srgrimes } else { 4381590Srgrimes n_bits++; 4391590Srgrimes if (n_bits == maxbits) 4401590Srgrimes maxcode = maxmaxcode; 4411590Srgrimes else 4421590Srgrimes maxcode = MAXCODE(n_bits); 4431590Srgrimes } 4441590Srgrimes } 4451590Srgrimes } else { 4461590Srgrimes /* At EOF, write the rest of the buffer. */ 4471590Srgrimes if (offset > 0) { 4481590Srgrimes offset = (offset + 7) / 8; 4491590Srgrimes if (fwrite(buf, 1, offset, fp) != offset) 4501590Srgrimes return (-1); 4511590Srgrimes bytes_out += offset; 4521590Srgrimes } 4531590Srgrimes offset = 0; 4541590Srgrimes } 4551590Srgrimes return (0); 4561590Srgrimes} 4571590Srgrimes 4581590Srgrimes/* 4591590Srgrimes * Decompress read. This routine adapts to the codes in the file building 4601590Srgrimes * the "string" table on-the-fly; requiring no table to be stored in the 4611590Srgrimes * compressed file. The tables used herein are shared with those of the 4621590Srgrimes * compress() routine. See the definitions above. 4631590Srgrimes */ 4641590Srgrimesstatic int 4651590Srgrimeszread(cookie, rbp, num) 4661590Srgrimes void *cookie; 4671590Srgrimes char *rbp; 4681590Srgrimes int num; 4691590Srgrimes{ 47087214Smarkm u_int count; 4711590Srgrimes struct s_zstate *zs; 4721590Srgrimes u_char *bp, header[3]; 4731590Srgrimes 4741590Srgrimes if (num == 0) 4751590Srgrimes return (0); 4761590Srgrimes 4771590Srgrimes zs = cookie; 4781590Srgrimes count = num; 4791590Srgrimes bp = (u_char *)rbp; 4801590Srgrimes switch (state) { 4811590Srgrimes case S_START: 4821590Srgrimes state = S_MIDDLE; 4831590Srgrimes break; 4841590Srgrimes case S_MIDDLE: 4851590Srgrimes goto middle; 4861590Srgrimes case S_EOF: 4871590Srgrimes goto eof; 4881590Srgrimes } 4891590Srgrimes 4901590Srgrimes /* Check the magic number */ 4911590Srgrimes if (fread(header, 4921590Srgrimes sizeof(char), sizeof(header), fp) != sizeof(header) || 4931590Srgrimes memcmp(header, magic_header, sizeof(magic_header)) != 0) { 4941590Srgrimes errno = EFTYPE; 4951590Srgrimes return (-1); 4961590Srgrimes } 4971590Srgrimes maxbits = header[2]; /* Set -b from file. */ 4981590Srgrimes block_compress = maxbits & BLOCK_MASK; 4991590Srgrimes maxbits &= BIT_MASK; 5001590Srgrimes maxmaxcode = 1L << maxbits; 5011590Srgrimes if (maxbits > BITS) { 5021590Srgrimes errno = EFTYPE; 5031590Srgrimes return (-1); 5041590Srgrimes } 5051590Srgrimes /* As above, initialize the first 256 entries in the table. */ 5061590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 5071590Srgrimes for (code = 255; code >= 0; code--) { 5081590Srgrimes tab_prefixof(code) = 0; 5091590Srgrimes tab_suffixof(code) = (char_type) code; 5101590Srgrimes } 5111590Srgrimes free_ent = block_compress ? FIRST : 256; 5121590Srgrimes 5131590Srgrimes finchar = oldcode = getcode(zs); 5141590Srgrimes if (oldcode == -1) /* EOF already? */ 5151590Srgrimes return (0); /* Get out of here */ 5161590Srgrimes 5171590Srgrimes /* First code must be 8 bits = char. */ 5181590Srgrimes *bp++ = (u_char)finchar; 5191590Srgrimes count--; 5201590Srgrimes stackp = de_stack; 5211590Srgrimes 5221590Srgrimes while ((code = getcode(zs)) > -1) { 5231590Srgrimes 5241590Srgrimes if ((code == CLEAR) && block_compress) { 5251590Srgrimes for (code = 255; code >= 0; code--) 5261590Srgrimes tab_prefixof(code) = 0; 5271590Srgrimes clear_flg = 1; 5281590Srgrimes free_ent = FIRST - 1; 5291590Srgrimes if ((code = getcode(zs)) == -1) /* O, untimely death! */ 5301590Srgrimes break; 5311590Srgrimes } 5321590Srgrimes incode = code; 5331590Srgrimes 5341590Srgrimes /* Special case for KwKwK string. */ 5351590Srgrimes if (code >= free_ent) { 5361590Srgrimes *stackp++ = finchar; 5371590Srgrimes code = oldcode; 5381590Srgrimes } 5391590Srgrimes 5401590Srgrimes /* Generate output characters in reverse order. */ 5411590Srgrimes while (code >= 256) { 5421590Srgrimes *stackp++ = tab_suffixof(code); 5431590Srgrimes code = tab_prefixof(code); 5441590Srgrimes } 5451590Srgrimes *stackp++ = finchar = tab_suffixof(code); 5461590Srgrimes 5471590Srgrimes /* And put them out in forward order. */ 5481590Srgrimesmiddle: do { 5491590Srgrimes if (count-- == 0) 5501590Srgrimes return (num); 5511590Srgrimes *bp++ = *--stackp; 5521590Srgrimes } while (stackp > de_stack); 5531590Srgrimes 5541590Srgrimes /* Generate the new entry. */ 5551590Srgrimes if ((code = free_ent) < maxmaxcode) { 5561590Srgrimes tab_prefixof(code) = (u_short) oldcode; 5571590Srgrimes tab_suffixof(code) = finchar; 5581590Srgrimes free_ent = code + 1; 5591590Srgrimes } 5601590Srgrimes 5611590Srgrimes /* Remember previous code. */ 5621590Srgrimes oldcode = incode; 5631590Srgrimes } 5641590Srgrimes state = S_EOF; 5651590Srgrimeseof: return (num - count); 5661590Srgrimes} 5671590Srgrimes 5681590Srgrimes/*- 5691590Srgrimes * Read one code from the standard input. If EOF, return -1. 5701590Srgrimes * Inputs: 5711590Srgrimes * stdin 5721590Srgrimes * Outputs: 5731590Srgrimes * code or -1 is returned. 5741590Srgrimes */ 5751590Srgrimesstatic code_int 5761590Srgrimesgetcode(zs) 5771590Srgrimes struct s_zstate *zs; 5781590Srgrimes{ 57987214Smarkm code_int gcode; 58087214Smarkm int r_off, bits; 58187214Smarkm char_type *bp; 5821590Srgrimes 5831590Srgrimes bp = gbuf; 5841590Srgrimes if (clear_flg > 0 || roffset >= size || free_ent > maxcode) { 5851590Srgrimes /* 5861590Srgrimes * If the next entry will be too big for the current gcode 5871590Srgrimes * size, then we must increase the size. This implies reading 5881590Srgrimes * a new buffer full, too. 5891590Srgrimes */ 5901590Srgrimes if (free_ent > maxcode) { 5911590Srgrimes n_bits++; 5921590Srgrimes if (n_bits == maxbits) /* Won't get any bigger now. */ 5931590Srgrimes maxcode = maxmaxcode; 5941590Srgrimes else 5951590Srgrimes maxcode = MAXCODE(n_bits); 5961590Srgrimes } 5971590Srgrimes if (clear_flg > 0) { 5981590Srgrimes maxcode = MAXCODE(n_bits = INIT_BITS); 5991590Srgrimes clear_flg = 0; 6001590Srgrimes } 6011590Srgrimes size = fread(gbuf, 1, n_bits, fp); 6021590Srgrimes if (size <= 0) /* End of file. */ 6031590Srgrimes return (-1); 6041590Srgrimes roffset = 0; 6051590Srgrimes /* Round size down to integral number of codes. */ 6061590Srgrimes size = (size << 3) - (n_bits - 1); 6071590Srgrimes } 6081590Srgrimes r_off = roffset; 6091590Srgrimes bits = n_bits; 6101590Srgrimes 6111590Srgrimes /* Get to the first byte. */ 6121590Srgrimes bp += (r_off >> 3); 6131590Srgrimes r_off &= 7; 6141590Srgrimes 6151590Srgrimes /* Get first part (low order bits). */ 6161590Srgrimes gcode = (*bp++ >> r_off); 6171590Srgrimes bits -= (8 - r_off); 6181590Srgrimes r_off = 8 - r_off; /* Now, roffset into gcode word. */ 6191590Srgrimes 6201590Srgrimes /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 6211590Srgrimes if (bits >= 8) { 6221590Srgrimes gcode |= *bp++ << r_off; 6231590Srgrimes r_off += 8; 6241590Srgrimes bits -= 8; 6251590Srgrimes } 6261590Srgrimes 6271590Srgrimes /* High order bits. */ 6281590Srgrimes gcode |= (*bp & rmask[bits]) << r_off; 6291590Srgrimes roffset += n_bits; 6301590Srgrimes 6311590Srgrimes return (gcode); 6321590Srgrimes} 6331590Srgrimes 6341590Srgrimesstatic int 6351590Srgrimescl_block(zs) /* Table clear for block compress. */ 6361590Srgrimes struct s_zstate *zs; 6371590Srgrimes{ 63887214Smarkm long rat; 6391590Srgrimes 6401590Srgrimes checkpoint = in_count + CHECK_GAP; 6411590Srgrimes 6421590Srgrimes if (in_count > 0x007fffff) { /* Shift will overflow. */ 6431590Srgrimes rat = bytes_out >> 8; 6441590Srgrimes if (rat == 0) /* Don't divide by zero. */ 6451590Srgrimes rat = 0x7fffffff; 6461590Srgrimes else 6471590Srgrimes rat = in_count / rat; 6481590Srgrimes } else 6491590Srgrimes rat = (in_count << 8) / bytes_out; /* 8 fractional bits. */ 6501590Srgrimes if (rat > ratio) 6511590Srgrimes ratio = rat; 6521590Srgrimes else { 6531590Srgrimes ratio = 0; 6541590Srgrimes cl_hash(zs, (count_int) hsize); 6551590Srgrimes free_ent = FIRST; 6561590Srgrimes clear_flg = 1; 6571590Srgrimes if (output(zs, (code_int) CLEAR) == -1) 6581590Srgrimes return (-1); 6591590Srgrimes } 6601590Srgrimes return (0); 6611590Srgrimes} 6621590Srgrimes 6631590Srgrimesstatic void 6641590Srgrimescl_hash(zs, cl_hsize) /* Reset code table. */ 6651590Srgrimes struct s_zstate *zs; 66687214Smarkm count_int cl_hsize; 6671590Srgrimes{ 66887214Smarkm count_int *htab_p; 66987214Smarkm long i, m1; 6701590Srgrimes 6711590Srgrimes m1 = -1; 6721590Srgrimes htab_p = htab + cl_hsize; 6731590Srgrimes i = cl_hsize - 16; 6741590Srgrimes do { /* Might use Sys V memset(3) here. */ 6751590Srgrimes *(htab_p - 16) = m1; 6761590Srgrimes *(htab_p - 15) = m1; 6771590Srgrimes *(htab_p - 14) = m1; 6781590Srgrimes *(htab_p - 13) = m1; 6791590Srgrimes *(htab_p - 12) = m1; 6801590Srgrimes *(htab_p - 11) = m1; 6811590Srgrimes *(htab_p - 10) = m1; 6821590Srgrimes *(htab_p - 9) = m1; 6831590Srgrimes *(htab_p - 8) = m1; 6841590Srgrimes *(htab_p - 7) = m1; 6851590Srgrimes *(htab_p - 6) = m1; 6861590Srgrimes *(htab_p - 5) = m1; 6871590Srgrimes *(htab_p - 4) = m1; 6881590Srgrimes *(htab_p - 3) = m1; 6891590Srgrimes *(htab_p - 2) = m1; 6901590Srgrimes *(htab_p - 1) = m1; 6911590Srgrimes htab_p -= 16; 6921590Srgrimes } while ((i -= 16) >= 0); 6931590Srgrimes for (i += 16; i > 0; i--) 6941590Srgrimes *--htab_p = m1; 6951590Srgrimes} 6961590Srgrimes 6971590SrgrimesFILE * 6981590Srgrimeszopen(fname, mode, bits) 6991590Srgrimes const char *fname, *mode; 7001590Srgrimes int bits; 7011590Srgrimes{ 7021590Srgrimes struct s_zstate *zs; 7031590Srgrimes 70441568Sarchie if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' || 7051590Srgrimes bits < 0 || bits > BITS) { 7061590Srgrimes errno = EINVAL; 7071590Srgrimes return (NULL); 7081590Srgrimes } 7091590Srgrimes 7101590Srgrimes if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) 7111590Srgrimes return (NULL); 7121590Srgrimes 7131590Srgrimes maxbits = bits ? bits : BITS; /* User settable max # bits/code. */ 7145342Sats maxmaxcode = 1L << maxbits; /* Should NEVER generate this code. */ 7151590Srgrimes hsize = HSIZE; /* For dynamic table sizing. */ 7161590Srgrimes free_ent = 0; /* First unused entry. */ 7171590Srgrimes block_compress = BLOCK_MASK; 7181590Srgrimes clear_flg = 0; 7191590Srgrimes ratio = 0; 7201590Srgrimes checkpoint = CHECK_GAP; 7211590Srgrimes in_count = 1; /* Length of input. */ 7221590Srgrimes out_count = 0; /* # of codes output (for debugging). */ 7231590Srgrimes state = S_START; 7241590Srgrimes roffset = 0; 7251590Srgrimes size = 0; 7261590Srgrimes 7271590Srgrimes /* 7281590Srgrimes * Layering compress on top of stdio in order to provide buffering, 7291590Srgrimes * and ensure that reads and write work with the data specified. 7301590Srgrimes */ 7311590Srgrimes if ((fp = fopen(fname, mode)) == NULL) { 7321590Srgrimes free(zs); 7331590Srgrimes return (NULL); 7341590Srgrimes } 7351590Srgrimes switch (*mode) { 7361590Srgrimes case 'r': 7371590Srgrimes zmode = 'r'; 7381590Srgrimes return (funopen(zs, zread, NULL, NULL, zclose)); 7391590Srgrimes case 'w': 7401590Srgrimes zmode = 'w'; 7411590Srgrimes return (funopen(zs, NULL, zwrite, NULL, zclose)); 7421590Srgrimes } 7431590Srgrimes /* NOTREACHED */ 74441568Sarchie return (NULL); 7451590Srgrimes} 746