inffast.c revision 311264
1142425Snectar/* inffast.c -- fast decoding 2160814Ssimon * Copyright (C) 1995-2008, 2010, 2013 Mark Adler 3142425Snectar * For conditions of distribution and use, see copyright notice in zlib.h 4142425Snectar */ 5142425Snectar 6142425Snectar#include "zutil.h" 7142425Snectar#include "inftrees.h" 8142425Snectar#include "inflate.h" 9142425Snectar#include "inffast.h" 10142425Snectar 11142425Snectar#ifndef ASMINF 12142425Snectar 13142425Snectar/* Allow machine dependent optimization for post-increment or pre-increment. 14142425Snectar Based on testing to date, 15142425Snectar Pre-increment preferred for: 16142425Snectar - PowerPC G3 (Adler) 17142425Snectar - MIPS R5000 (Randers-Pehrson) 18142425Snectar Post-increment preferred for: 19142425Snectar - none 20160814Ssimon No measurable difference: 21142425Snectar - Pentium III (Anderson) 22142425Snectar - M68060 (Nikl) 23142425Snectar */ 24142425Snectar#ifdef POSTINC 25142425Snectar# define OFF 0 26142425Snectar# define PUP(a) *(a)++ 27142425Snectar#else 28142425Snectar# define OFF 1 29142425Snectar# define PUP(a) *++(a) 30160814Ssimon#endif 31194206Ssimon 32142425Snectar/* 33142425Snectar Decode literal, length, and distance codes and write out the resulting 34142425Snectar literal and match bytes until either not enough input or output is 35142425Snectar available, an end-of-block is encountered, or a data error is encountered. 36160814Ssimon When large enough input and output buffers are supplied to inflate(), for 37194206Ssimon example, a 16K input buffer and a 64K output buffer, more than 95% of the 38142425Snectar inflate execution time is spent in this routine. 39142425Snectar 40142425Snectar Entry assumptions: 41142425Snectar 42142425Snectar state->mode == LEN 43142425Snectar strm->avail_in >= 6 44142425Snectar strm->avail_out >= 258 45142425Snectar start >= strm->avail_out 46142425Snectar state->bits < 8 47142425Snectar 48142425Snectar On return, state->mode is one of: 49142425Snectar 50142425Snectar LEN -- ran out of enough output space or enough available input 51142425Snectar TYPE -- reached end of block code, inflate() to interpret next block 52142425Snectar BAD -- error in block data 53142425Snectar 54142425Snectar Notes: 55142425Snectar 56142425Snectar - The maximum input bits used by a length/distance pair is 15 bits for the 57142425Snectar length code, 5 bits for the length extra, 15 bits for the distance code, 58142425Snectar and 13 bits for the distance extra. This totals 48 bits, or six bytes. 59142425Snectar Therefore if strm->avail_in >= 6, then there is enough input to avoid 60142425Snectar checking for available input while decoding. 61194206Ssimon 62142425Snectar - The maximum bytes that a single length/distance pair can output is 258 63142425Snectar bytes, which is the maximum length that can be coded. inflate_fast() 64142425Snectar requires strm->avail_out >= 258 for each loop to avoid checking for 65160814Ssimon output space. 66160814Ssimon */ 67160814Ssimonvoid ZLIB_INTERNAL inflate_fast(strm, start) 68160814Ssimonz_streamp strm; 69160814Ssimonunsigned start; /* inflate()'s starting value for strm->avail_out */ 70194206Ssimon{ 71194206Ssimon struct inflate_state FAR *state; 72160814Ssimon z_const unsigned char FAR *in; /* local strm->next_in */ 73160814Ssimon z_const unsigned char FAR *last; /* have enough input while in < last */ 74160814Ssimon unsigned char FAR *out; /* local strm->next_out */ 75160814Ssimon unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 76160814Ssimon unsigned char FAR *end; /* while out < end, enough space available */ 77194206Ssimon#ifdef INFLATE_STRICT 78194206Ssimon unsigned dmax; /* maximum distance from zlib header */ 79142425Snectar#endif 80160814Ssimon unsigned wsize; /* window size or zero if not using window */ 81160814Ssimon unsigned whave; /* valid bytes in the window */ 82160814Ssimon unsigned wnext; /* window write index */ 83160814Ssimon unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 84194206Ssimon unsigned long hold; /* local strm->hold */ 85194206Ssimon unsigned bits; /* local strm->bits */ 86142425Snectar code const FAR *lcode; /* local strm->lencode */ 87160814Ssimon code const FAR *dcode; /* local strm->distcode */ 88160814Ssimon unsigned lmask; /* mask for first level of length codes */ 89160814Ssimon unsigned dmask; /* mask for first level of distance codes */ 90160814Ssimon code here; /* retrieved table entry */ 91142425Snectar unsigned op; /* code bits, operation, extra bits, or */ 92160814Ssimon /* window position, window bytes to copy */ 93160814Ssimon unsigned len; /* match length, unused bytes */ 94160814Ssimon unsigned dist; /* match distance */ 95160814Ssimon unsigned char FAR *from; /* where to copy match from */ 96160814Ssimon 97142425Snectar /* copy state to local variables */ 98160814Ssimon state = (struct inflate_state FAR *)strm->state; 99160814Ssimon in = strm->next_in - OFF; 100194206Ssimon last = in + (strm->avail_in - 5); 101194206Ssimon out = strm->next_out - OFF; 102142425Snectar beg = out - (start - strm->avail_out); 103160814Ssimon end = out + (strm->avail_out - 257); 104160814Ssimon#ifdef INFLATE_STRICT 105142425Snectar dmax = state->dmax; 106160814Ssimon#endif 107160814Ssimon wsize = state->wsize; 108160814Ssimon whave = state->whave; 109160814Ssimon wnext = state->wnext; 110160814Ssimon window = state->window; 111160814Ssimon hold = state->hold; 112142425Snectar bits = state->bits; 113160814Ssimon lcode = state->lencode; 114160814Ssimon dcode = state->distcode; 115160814Ssimon lmask = (1U << state->lenbits) - 1; 116160814Ssimon dmask = (1U << state->distbits) - 1; 117160814Ssimon 118160814Ssimon /* decode literals and length/distances until end-of-block or not enough 119194206Ssimon input data or output space */ 120142425Snectar do { 121142425Snectar if (bits < 15) { 122142425Snectar hold += (unsigned long)(PUP(in)) << bits; 123142425Snectar bits += 8; 124142425Snectar hold += (unsigned long)(PUP(in)) << bits; 125142425Snectar bits += 8; 126142425Snectar } 127142425Snectar here = lcode[hold & lmask]; 128142425Snectar dolen: 129142425Snectar op = (unsigned)(here.bits); 130160814Ssimon hold >>= op; 131160814Ssimon bits -= op; 132142425Snectar op = (unsigned)(here.op); 133142425Snectar if (op == 0) { /* literal */ 134142425Snectar Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? 135142425Snectar "inflate: literal '%c'\n" : 136142425Snectar "inflate: literal 0x%02x\n", here.val)); 137142425Snectar PUP(out) = (unsigned char)(here.val); 138142425Snectar } 139142425Snectar else if (op & 16) { /* length base */ 140142425Snectar len = (unsigned)(here.val); 141142425Snectar op &= 15; /* number of extra bits */ 142142425Snectar if (op) { 143142425Snectar if (bits < op) { 144142425Snectar hold += (unsigned long)(PUP(in)) << bits; 145142425Snectar bits += 8; 146142425Snectar } 147142425Snectar len += (unsigned)hold & ((1U << op) - 1); 148142425Snectar hold >>= op; 149142425Snectar bits -= op; 150142425Snectar } 151142425Snectar Tracevv((stderr, "inflate: length %u\n", len)); 152142425Snectar if (bits < 15) { 153142425Snectar hold += (unsigned long)(PUP(in)) << bits; 154160814Ssimon bits += 8; 155142425Snectar hold += (unsigned long)(PUP(in)) << bits; 156142425Snectar bits += 8; 157142425Snectar } 158142425Snectar here = dcode[hold & dmask]; 159142425Snectar dodist: 160142425Snectar op = (unsigned)(here.bits); 161142425Snectar hold >>= op; 162160814Ssimon bits -= op; 163142425Snectar op = (unsigned)(here.op); 164142425Snectar if (op & 16) { /* distance base */ 165142425Snectar dist = (unsigned)(here.val); 166142425Snectar op &= 15; /* number of extra bits */ 167142425Snectar if (bits < op) { 168142425Snectar hold += (unsigned long)(PUP(in)) << bits; 169142425Snectar bits += 8; 170160814Ssimon if (bits < op) { 171160814Ssimon hold += (unsigned long)(PUP(in)) << bits; 172160814Ssimon bits += 8; 173142425Snectar } 174142425Snectar } 175142425Snectar dist += (unsigned)hold & ((1U << op) - 1); 176142425Snectar#ifdef INFLATE_STRICT 177160814Ssimon if (dist > dmax) { 178160814Ssimon strm->msg = (char *)"invalid distance too far back"; 179160814Ssimon state->mode = BAD; 180142425Snectar break; 181142425Snectar } 182142425Snectar#endif 183142425Snectar hold >>= op; 184160814Ssimon bits -= op; 185160814Ssimon Tracevv((stderr, "inflate: distance %u\n", dist)); 186160814Ssimon op = (unsigned)(out - beg); /* max distance in output */ 187160814Ssimon if (dist > op) { /* see if copy from window */ 188160814Ssimon op = dist - op; /* distance back in window */ 189142425Snectar if (op > whave) { 190142425Snectar if (state->sane) { 191142425Snectar strm->msg = 192142425Snectar (char *)"invalid distance too far back"; 193160814Ssimon state->mode = BAD; 194160814Ssimon break; 195160814Ssimon } 196160814Ssimon#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR 197160814Ssimon if (len <= op - whave) { 198160814Ssimon do { 199160814Ssimon PUP(out) = 0; 200160814Ssimon } while (--len); 201160814Ssimon continue; 202160814Ssimon } 203160814Ssimon len -= op - whave; 204142425Snectar do { 205142425Snectar PUP(out) = 0; 206142425Snectar } while (--op > whave); 207142425Snectar if (op == 0) { 208160814Ssimon from = out - dist; 209160814Ssimon do { 210160814Ssimon PUP(out) = PUP(from); 211142425Snectar } while (--len); 212142425Snectar continue; 213142425Snectar } 214142425Snectar#endif 215160814Ssimon } 216160814Ssimon from = window - OFF; 217160814Ssimon if (wnext == 0) { /* very common case */ 218142425Snectar from += wsize - op; 219142425Snectar if (op < len) { /* some from window */ 220142425Snectar len -= op; 221142425Snectar do { 222160814Ssimon PUP(out) = PUP(from); 223160814Ssimon } while (--op); 224160814Ssimon from = out - dist; /* rest from output */ 225142425Snectar } 226142425Snectar } 227142425Snectar else if (wnext < op) { /* wrap around window */ 228142425Snectar from += wsize + wnext - op; 229160814Ssimon op -= wnext; 230160814Ssimon if (op < len) { /* some from end of window */ 231160814Ssimon len -= op; 232142425Snectar do { 233142425Snectar PUP(out) = PUP(from); 234142425Snectar } while (--op); 235142425Snectar from = window - OFF; 236160814Ssimon if (wnext < len) { /* some from start of window */ 237160814Ssimon op = wnext; 238160814Ssimon len -= op; 239160814Ssimon do { 240160814Ssimon PUP(out) = PUP(from); 241160814Ssimon } while (--op); 242160814Ssimon from = out - dist; /* rest from output */ 243160814Ssimon } 244160814Ssimon } 245160814Ssimon } 246160814Ssimon else { /* contiguous in window */ 247160814Ssimon from += wnext - op; 248160814Ssimon if (op < len) { /* some from window */ 249160814Ssimon len -= op; 250160814Ssimon do { 251160814Ssimon PUP(out) = PUP(from); 252160814Ssimon } while (--op); 253142425Snectar from = out - dist; /* rest from output */ 254142425Snectar } 255142425Snectar } 256142425Snectar while (len > 2) { 257160814Ssimon PUP(out) = PUP(from); 258160814Ssimon PUP(out) = PUP(from); 259160814Ssimon PUP(out) = PUP(from); 260142425Snectar len -= 3; 261142425Snectar } 262142425Snectar if (len) { 263142425Snectar PUP(out) = PUP(from); 264160814Ssimon if (len > 1) 265160814Ssimon PUP(out) = PUP(from); 266160814Ssimon } 267142425Snectar } 268142425Snectar else { 269142425Snectar from = out - dist; /* copy direct from output */ 270142425Snectar do { /* minimum length is three */ 271160814Ssimon PUP(out) = PUP(from); 272160814Ssimon PUP(out) = PUP(from); 273160814Ssimon PUP(out) = PUP(from); 274142425Snectar len -= 3; 275142425Snectar } while (len > 2); 276142425Snectar if (len) { 277142425Snectar PUP(out) = PUP(from); 278160814Ssimon if (len > 1) 279160814Ssimon PUP(out) = PUP(from); 280160814Ssimon } 281142425Snectar } 282142425Snectar } 283142425Snectar else if ((op & 64) == 0) { /* 2nd level distance code */ 284142425Snectar here = dcode[here.val + (hold & ((1U << op) - 1))]; 285160814Ssimon goto dodist; 286160814Ssimon } 287160814Ssimon else { 288160814Ssimon strm->msg = (char *)"invalid distance code"; 289160814Ssimon state->mode = BAD; 290160814Ssimon break; 291160814Ssimon } 292160814Ssimon } 293160814Ssimon else if ((op & 64) == 0) { /* 2nd level length code */ 294160814Ssimon here = lcode[here.val + (hold & ((1U << op) - 1))]; 295194206Ssimon goto dolen; 296194206Ssimon } 297194206Ssimon else if (op & 32) { /* end-of-block */ 298194206Ssimon Tracevv((stderr, "inflate: end of block\n")); 299194206Ssimon state->mode = TYPE; 300194206Ssimon break; 301194206Ssimon } 302142425Snectar else { 303142425Snectar strm->msg = (char *)"invalid literal/length code"; 304142425Snectar state->mode = BAD; 305142425Snectar break; 306142425Snectar } 307142425Snectar } while (in < last && out < end); 308142425Snectar 309142425Snectar /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 310142425Snectar len = bits >> 3; 311142425Snectar in -= len; 312142425Snectar bits -= len << 3; 313142425Snectar hold &= (1U << bits) - 1; 314160814Ssimon 315160814Ssimon /* update state and return */ 316160814Ssimon strm->next_in = in + OFF; 317142425Snectar strm->next_out = out + OFF; 318142425Snectar strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 319142425Snectar strm->avail_out = (unsigned)(out < end ? 320142425Snectar 257 + (end - out) : 257 - (out - end)); 321142425Snectar state->hold = hold; 322142425Snectar state->bits = bits; 323142425Snectar return; 324142425Snectar} 325142425Snectar 326142425Snectar/* 327142425Snectar inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 328142425Snectar - Using bit fields for code structure 329160814Ssimon - Different op definition to avoid & for extra bits (do & for table bits) 330160814Ssimon - Three separate decoding do-loops for direct, window, and wnext == 0 331160814Ssimon - Special case for distance > 1 copies to do overlapped load and store copy 332142425Snectar - Explicit branch predictions (based on measured branch probabilities) 333142425Snectar - Deferring match copy and interspersed it with decoding subsequent codes 334142425Snectar - Swapping literal/length else 335142425Snectar - Swapping window/direct else 336160814Ssimon - Larger unrolled copy loops (three is about right) 337160814Ssimon - Moving len -= 3 statement into middle of loop 338160814Ssimon */ 339142425Snectar 340142425Snectar#endif /* !ASMINF */ 341142425Snectar