inffast.c revision 131377
153024Sguido/* inffast.c -- fast decoding
253024Sguido * Copyright (C) 1995-2003 Mark Adler
353024Sguido * For conditions of distribution and use, see copyright notice in zlib.h
453024Sguido */
553024Sguido
653024Sguido#include "zutil.h"
753024Sguido#include "inftrees.h"
853024Sguido#include "inflate.h"
953024Sguido#include "inffast.h"
1053024Sguido
1153024Sguido#ifndef ASMINF
1253024Sguido
1353024Sguido/* Allow machine dependent optimization for post-increment or pre-increment.
1453024Sguido   Based on testing to date,
1553024Sguido   Pre-increment preferred for:
1653024Sguido   - PowerPC G3 (Adler)
1753024Sguido   - MIPS R5000 (Randers-Pehrson)
1853024Sguido   Post-increment preferred for:
1953024Sguido   - none
2053024Sguido   No measurable difference:
2153024Sguido   - Pentium III (Anderson)
2253024Sguido   - 68060 (Nikl)
2353024Sguido */
2453024Sguido#ifdef POSTINC
2553024Sguido#  define OFF 0
2653024Sguido#  define PUP(a) *(a)++
2753024Sguido#else
2853024Sguido#  define OFF 1
2953024Sguido#  define PUP(a) *++(a)
3053024Sguido#endif
3153024Sguido
3253024Sguido/*
3353024Sguido   Decode literal, length, and distance codes and write out the resulting
3453024Sguido   literal and match bytes until either not enough input or output is
3553024Sguido   available, an end-of-block is encountered, or a data error is encountered.
3653024Sguido   When large enough input and output buffers are supplied to inflate(), for
3753024Sguido   example, a 16K input buffer and a 64K output buffer, more than 95% of the
3853024Sguido   inflate execution time is spent in this routine.
3953024Sguido
4053024Sguido   Entry assumptions:
4153024Sguido
4253024Sguido        state->mode == LEN
4353024Sguido        strm->avail_in >= 6
4453024Sguido        strm->avail_out >= 258
4553024Sguido        start >= strm->avail_out
4653024Sguido        state->bits < 8
4753024Sguido
4853024Sguido   On return, state->mode is one of:
4953024Sguido
50        LEN -- ran out of enough output space or enough available input
51        TYPE -- reached end of block code, inflate() to interpret next block
52        BAD -- error in block data
53
54   Notes:
55
56    - The maximum input bits used by a length/distance pair is 15 bits for the
57      length code, 5 bits for the length extra, 15 bits for the distance code,
58      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
59      Therefore if strm->avail_in >= 6, then there is enough input to avoid
60      checking for available input while decoding.
61
62    - The maximum bytes that a single length/distance pair can output is 258
63      bytes, which is the maximum length that can be coded.  inflate_fast()
64      requires strm->avail_out >= 258 for each loop to avoid checking for
65      output space.
66 */
67void inflate_fast(strm, start)
68z_streamp strm;
69unsigned start;         /* inflate()'s starting value for strm->avail_out */
70{
71    struct inflate_state FAR *state;
72    unsigned char FAR *in;      /* local strm->next_in */
73    unsigned char FAR *last;    /* while in < last, enough input available */
74    unsigned char FAR *out;     /* local strm->next_out */
75    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
76    unsigned char FAR *end;     /* while out < end, enough space available */
77    unsigned wsize;             /* window size or zero if not using window */
78    unsigned whave;             /* valid bytes in the window */
79    unsigned write;             /* window write index */
80    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
81    unsigned long hold;         /* local strm->hold */
82    unsigned bits;              /* local strm->bits */
83    code const FAR *lcode;      /* local strm->lencode */
84    code const FAR *dcode;      /* local strm->distcode */
85    unsigned lmask;             /* mask for first level of length codes */
86    unsigned dmask;             /* mask for first level of distance codes */
87    code this;                  /* retrieved table entry */
88    unsigned op;                /* code bits, operation, extra bits, or */
89                                /*  window position, window bytes to copy */
90    unsigned len;               /* match length, unused bytes */
91    unsigned dist;              /* match distance */
92    unsigned char FAR *from;    /* where to copy match from */
93
94    /* copy state to local variables */
95    state = (struct inflate_state FAR *)strm->state;
96    in = strm->next_in - OFF;
97    last = in + (strm->avail_in - 5);
98    out = strm->next_out - OFF;
99    beg = out - (start - strm->avail_out);
100    end = out + (strm->avail_out - 257);
101    wsize = state->wsize;
102    whave = state->whave;
103    write = state->write;
104    window = state->window;
105    hold = state->hold;
106    bits = state->bits;
107    lcode = state->lencode;
108    dcode = state->distcode;
109    lmask = (1U << state->lenbits) - 1;
110    dmask = (1U << state->distbits) - 1;
111
112    /* decode literals and length/distances until end-of-block or not enough
113       input data or output space */
114    do {
115        if (bits < 15) {
116            hold += (unsigned long)(PUP(in)) << bits;
117            bits += 8;
118            hold += (unsigned long)(PUP(in)) << bits;
119            bits += 8;
120        }
121        this = lcode[hold & lmask];
122      dolen:
123        op = (unsigned)(this.bits);
124        hold >>= op;
125        bits -= op;
126        op = (unsigned)(this.op);
127        if (op == 0) {                          /* literal */
128            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
129                    "inflate:         literal '%c'\n" :
130                    "inflate:         literal 0x%02x\n", this.val));
131            PUP(out) = (unsigned char)(this.val);
132        }
133        else if (op & 16) {                     /* length base */
134            len = (unsigned)(this.val);
135            op &= 15;                           /* number of extra bits */
136            if (op) {
137                if (bits < op) {
138                    hold += (unsigned long)(PUP(in)) << bits;
139                    bits += 8;
140                }
141                len += (unsigned)hold & ((1U << op) - 1);
142                hold >>= op;
143                bits -= op;
144            }
145            Tracevv((stderr, "inflate:         length %u\n", len));
146            if (bits < 15) {
147                hold += (unsigned long)(PUP(in)) << bits;
148                bits += 8;
149                hold += (unsigned long)(PUP(in)) << bits;
150                bits += 8;
151            }
152            this = dcode[hold & dmask];
153          dodist:
154            op = (unsigned)(this.bits);
155            hold >>= op;
156            bits -= op;
157            op = (unsigned)(this.op);
158            if (op & 16) {                      /* distance base */
159                dist = (unsigned)(this.val);
160                op &= 15;                       /* number of extra bits */
161                if (bits < op) {
162                    hold += (unsigned long)(PUP(in)) << bits;
163                    bits += 8;
164                    if (bits < op) {
165                        hold += (unsigned long)(PUP(in)) << bits;
166                        bits += 8;
167                    }
168                }
169                dist += (unsigned)hold & ((1U << op) - 1);
170                hold >>= op;
171                bits -= op;
172                Tracevv((stderr, "inflate:         distance %u\n", dist));
173                op = (unsigned)(out - beg);     /* max distance in output */
174                if (dist > op) {                /* see if copy from window */
175                    op = dist - op;             /* distance back in window */
176                    if (op > whave) {
177                        strm->msg = (char *)"invalid distance too far back";
178                        state->mode = BAD;
179                        break;
180                    }
181                    from = window - OFF;
182                    if (write == 0) {           /* very common case */
183                        from += wsize - op;
184                        if (op < len) {         /* some from window */
185                            len -= op;
186                            do {
187                                PUP(out) = PUP(from);
188                            } while (--op);
189                            from = out - dist;  /* rest from output */
190                        }
191                    }
192                    else if (write < op) {      /* wrap around window */
193                        from += wsize + write - op;
194                        op -= write;
195                        if (op < len) {         /* some from end of window */
196                            len -= op;
197                            do {
198                                PUP(out) = PUP(from);
199                            } while (--op);
200                            from = window - OFF;
201                            if (write < len) {  /* some from start of window */
202                                op = write;
203                                len -= op;
204                                do {
205                                    PUP(out) = PUP(from);
206                                } while (--op);
207                                from = out - dist;      /* rest from output */
208                            }
209                        }
210                    }
211                    else {                      /* contiguous in window */
212                        from += write - op;
213                        if (op < len) {         /* some from window */
214                            len -= op;
215                            do {
216                                PUP(out) = PUP(from);
217                            } while (--op);
218                            from = out - dist;  /* rest from output */
219                        }
220                    }
221                    while (len > 2) {
222                        PUP(out) = PUP(from);
223                        PUP(out) = PUP(from);
224                        PUP(out) = PUP(from);
225                        len -= 3;
226                    }
227                    if (len) {
228                        PUP(out) = PUP(from);
229                        if (len > 1)
230                            PUP(out) = PUP(from);
231                    }
232                }
233                else {
234                    from = out - dist;          /* copy direct from output */
235                    do {                        /* minimum length is three */
236                        PUP(out) = PUP(from);
237                        PUP(out) = PUP(from);
238                        PUP(out) = PUP(from);
239                        len -= 3;
240                    } while (len > 2);
241                    if (len) {
242                        PUP(out) = PUP(from);
243                        if (len > 1)
244                            PUP(out) = PUP(from);
245                    }
246                }
247            }
248            else if ((op & 64) == 0) {          /* 2nd level distance code */
249                this = dcode[this.val + (hold & ((1U << op) - 1))];
250                goto dodist;
251            }
252            else {
253                strm->msg = (char *)"invalid distance code";
254                state->mode = BAD;
255                break;
256            }
257        }
258        else if ((op & 64) == 0) {              /* 2nd level length code */
259            this = lcode[this.val + (hold & ((1U << op) - 1))];
260            goto dolen;
261        }
262        else if (op & 32) {                     /* end-of-block */
263            Tracevv((stderr, "inflate:         end of block\n"));
264            state->mode = TYPE;
265            break;
266        }
267        else {
268            strm->msg = (char *)"invalid literal/length code";
269            state->mode = BAD;
270            break;
271        }
272    } while (in < last && out < end);
273
274    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
275    len = bits >> 3;
276    in -= len;
277    bits -= len << 3;
278    hold &= (1U << bits) - 1;
279
280    /* update state and return */
281    strm->next_in = in + OFF;
282    strm->next_out = out + OFF;
283    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
284    strm->avail_out = (unsigned)(out < end ?
285                                 257 + (end - out) : 257 - (out - end));
286    state->hold = hold;
287    state->bits = bits;
288    return;
289}
290
291/*
292   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
293   - Using bit fields for code structure
294   - Different op definition to avoid & for extra bits (do & for table bits)
295   - Three separate decoding do-loops for direct, window, and write == 0
296   - Special case for distance > 1 copies to do overlapped load and store copy
297   - Explicit branch predictions (based on measured branch probabilities)
298   - Deferring match copy and interspersed it with decoding subsequent codes
299   - Swapping literal/length else
300   - Swapping window/direct else
301   - Larger unrolled copy loops (three is about right)
302   - Moving len -= 3 statement into middle of loop
303 */
304
305#endif /* !ASMINF */
306