1#if (defined __i386__) 2 3/* this assembly was 1st compiled from inffast.c (assuming POSTINC defined, OFF=0) and then hand optimized */ 4 5 .cstring 6LC0: 7 .ascii "invalid distance too far back\0" 8LC1: 9 .ascii "invalid distance code\0" 10LC2: 11 .ascii "invalid literal/length code\0" 12 .text 13 .align 4,0x90 14 15 16#ifdef INFLATE_STRICT 17 .byte 0 18 .byte 0 19 .byte 0 20 .byte 0 21 .byte 0 22 .byte 0 23 .byte 0 24 .byte 0 25 .byte 0 26 .byte 0 27#endif 28.globl _inflate_fast 29_inflate_fast: 30 31 // set up ebp to refer to arguments strm and start 32 pushl %ebp 33 movl %esp, %ebp 34 35 // push edi/esi/ebx into stack 36 pushl %edi 37 pushl %esi 38 pushl %ebx 39 40 // allocate for local variables 92-12=80, + 12 to align %esp to 16-byte boundary 41 subl $92, %esp 42 movl 8(%ebp), %ebx 43 44 /* definitions to help code readability */ 45 46 #define bits %edi 47 #define strm %ebx 48 #define state 28(strm) // state = (struct inflate_state FAR *)strm->state; 49 #define in -84(%ebp) // in = strm->next_in - OFF; OFF=0 50 #define last -80(%ebp) // last = in + (strm->avail_in - 5); 51 #define out -28(%ebp) // out = strm->next_out - OFF; 52 #define beg -76(%ebp) // beg = out - (start - strm->avail_out); 53 #define end -72(%ebp) // end = out + (strm->avail_out - 257); 54 #define wsize -68(%ebp) // wsize = state->wsize; 55 #define whave -64(%ebp) // whave = state->whave; 56 #define write -60(%ebp) // write = state->write; 57 #define window -56(%ebp) // window = state->window; 58 #define hold -52(%ebp) // hold = state->hold; 59 #define lcode -48(%ebp) // lcode = state->lencode; 60 #define dcode -44(%ebp) // dcode = state->distcode; 61 #define lmask -40(%ebp) // lmask = (1U << state->lenbits) - 1; 62 #define dmask -36(%ebp) // dmask = (1U << state->distbits) - 1; 63 #define len -32(%ebp) 64 #define dmax -20(%ebp) 65 #define dist -16(%ebp) // dist 66 #define write_wsize -24(%ebp) // write+wsize 67 #define write_1 -88(%ebp) // write-1 68 #define op -92(%ebp) // op 69 70 movl (strm), %eax // strm->next_in 71 movl %eax, in // in = strm->next_in - OFF; OFF=0 72 73 subl $5, %eax // in - 5; 74 movl 4(strm), %ecx // strm->avail_in 75 addl %ecx, %eax // in + (strm->avail_in - 5); 76 movl %eax, last // last = in + (strm->avail_in - 5); 77 78 movl 12(strm), %esi // strm->next_out 79 movl %esi, out // out = strm->next_out - OFF; 80 81 movl 16(strm), %ecx // strm->avail_out 82 movl %esi, %eax // out 83 subl 12(%ebp), %eax // out - start 84 addl %ecx, %eax // out - (start - strm->avail_out); 85 movl %eax, beg // beg = out - (start - strm->avail_out); 86 87 leal -257(%esi,%ecx), %ecx // out + (strm->avail_out - 257); 88 movl %ecx, end // end = out + (strm->avail_out - 257); 89 90 movl state, %edx 91 92#ifdef INFLATE_STRICT 93 movl 20(%edx), %ecx // state->dmax 94 movl %ecx, dmax // dmax = state->dmax; 95#endif 96 97 movl 40(%edx), %ecx // state->wsize 98 movl %ecx, wsize // wsize = state->wsize; 99 100 movl 44(%edx), %ecx // state->whave 101 movl %ecx, whave // whave = state->whave; 102 103 movl 48(%edx), %esi // state->write 104 movl %esi, write // write = state->write; 105 106 movl 52(%edx), %eax // state->window 107 movl %eax, window // window = state->window; 108 109 110 movl 56(%edx), %ecx // state->hold 111 movl %ecx, hold // hold = state->hold 112 113 movl 60(%edx), bits // bits = state->bits; 114 115 movl 76(%edx), %esi // state->lencode 116 movl %esi, lcode // lcode = state->lencode; 117 118 movl 80(%edx), %eax // state->distcode 119 movl %eax, dcode // dcode = state->distcode; 120 121 movl 84(%edx), %ecx // state->lenbits 122 movl $1, %eax 123 movl %eax, %esi // a copy of 1 124 sall %cl, %esi // 1 << state->lenbits 125 decl %esi // (1U << state->lenbits) - 1; 126 movl %esi, lmask // lmask = (1U << state->lenbits) - 1; 127 128 movl 88(%edx), %ecx // state->distbits 129 sall %cl, %eax // 1 << state->distbits 130 decl %eax // (1U << state->distbits) - 1; 131 movl %eax, dmask // dmask = (1U << state->distbits) - 1; 132 133 134 // these 2 might be used often, precomputed and saved in stack 135 movl write, %eax 136 addl wsize, %eax 137 movl %eax, write_wsize // write+wsize 138 139 movl write, %edx 140 decl %edx 141 movl %edx, write_1 // write-1 142 143 144L_do_while_loop: // do { 145 146 cmpl $15, bits 147 jae bits_ge_15 // if (bits < 15) { 148#if 0 149 leal 8(bits), %esi // esi = bits+8 150 movl in, %eax // eax = in 151 movzbl (%eax), %edx // edx = *in++ 152 movl bits, %ecx // cl = bits 153 sall %cl, %edx // 1st *in << bits 154 addl hold, %edx // hold += 1st *in << bits 155 movzbl 1(%eax), %eax // 2nd *in 156 movl %esi, %ecx // cl = bits+8 157 sall %cl, %eax // 2nd *in << (bits+8) 158 addl %eax, %edx // hold += 2nd *in << (bits+8) 159 movl %edx, hold // update hold 160 addl $2, in // in += 2 161 addl $16, bits // bits += 16; 162#else 163 /* from simulation, this code segment performs better than the other case 164 possibly, we are more often hit with aligned memory access */ 165 movl in, %ecx // unsigned short *inp = (unsigned short *) (in+OFF); 166 movzwl (%ecx), %eax // *((unsigned short *) in); 167 movl bits, %ecx // bits 168 sall %cl, %eax // *((unsigned short *) in) << bits 169 addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits; 170 addl $2, in // in += 2; 171 addl $16, bits // bits += 16; 172#endif 173 174bits_ge_15: // } /* bits < 15 */ 175 176 movl hold, %eax // hold 177 andl lmask, %eax // hold & lmask; 178 movl lcode, %esi // lcode[] : 4-byte aligned 179 movl (%esi,%eax,4), %eax // this = lcode[hold&lmask]; 180 jmp dolen 181 .align 4,0x90 182op_nonzero: 183 movzbl %al, %ecx // a copy of op to cl 184 testb $16, %cl // if op&16 185 jne Llength_base // branch to length_base 186 187 testb $64, %cl // elif op&64 188 jne length_2nd_level_else // branch to 2nd level length code else conditions 189 190 // 2nd level length code 191 192 movl $1, %eax 193 sall %cl, %eax // 1 << op 194 decl %eax // ((1<<op) - 1) 195 andl hold, %eax // hold & ((1U << op) - 1) 196 movzwl %si, %ecx // this.val 197 addl %ecx, %eax // this.val + (hold & ((1U << op) - 1)) 198 199 movl lcode, %ecx // lcode[] : 4-byte aligned 200 movl (%ecx,%eax,4), %eax // this = lcode[this.val + (hold & ((1U << op) - 1))]; 201 // goto dolen (compiler rearranged the order of code) 202dolen: 203 movl %eax, %esi // make a copy of this (val 16-bit, bits 8-bit, op 8-bit) 204 shrl $16, %esi // %esi = this.val; 205 movzbl %ah, %ecx // op = (unsigned)(this.bits); 206 shrl %cl, hold // hold >>= op; 207 subl %ecx, bits // bits -= op; 208 testb %al, %al // op = (unsigned)(this.op); 209 jne op_nonzero // if op!=0, branch to op_nonzero 210 211 movl %esi, %ecx // this.val; 212 movl out, %eax // out 213 movb %cl, (%eax) // PUP(out) = (unsigned char)(this.val); 214 incl %eax // out++; 215 movl %eax, out // save out 216 217L_tst_do_while_loop_end: 218 movl last, %eax // last 219 cmpl %eax, in // in vs last 220 jae return_unused_bytes // branch to return_unused_bytes if in >= last 221 movl end, %edx // end 222 cmpl %edx, out // out vs end 223 jb L_do_while_loop // branch to do loop if out < end 224 225return_unused_bytes: 226 227 movl bits, %eax // bits 228 shrl $3, %eax // len = bits >> 3 229 movl in, %edx // in 230 subl %eax, %edx // in -= len 231 sall $3, %eax // len << 3 232 movl bits, %ecx // bits 233 subl %eax, %ecx // bits -= len << 3 234 235 movl %edx, (strm) // strm->next_in = in + OFF; 236 movl out, %eax 237 movl %eax, 12(strm) // strm->next_out = out + OFF; 238 239 cmpl %edx, last // last vs in 240 jbe L67 // if (last <= in) branch to L67 and return to L69 241 movl last, %eax // last 242 addl $5, %eax // 5 + last 243 subl %edx, %eax // 5 + last - in 244L69: 245 movl %eax, 4(strm) // update strm->avail_in 246 247 movl end, %eax 248 cmpl %eax, out // out vs end 249 jae L70 // if (out>=end) branch to L70, and return to L72 250 addl $257, %eax // 257 + end 251 subl out, %eax // 257 + end - out 252L72: 253 movl %eax, 16(strm) // update strm->avail_out 254 255 movl $1, %eax 256 sall %cl, %eax // 1 << bits 257 decl %eax // (1 << bits) -1 258 andl hold, %eax // hold &= (1U << bits) - 1; 259 movl state, %esi 260 movl %eax, 56(%esi) // state->hold = hold; 261 movl %ecx, 60(%esi) // state->bits = bits; 262 263 addl $92, %esp // pop out local from stack 264 265 // restore saved registers and return 266 popl %ebx 267 popl %esi 268 popl %edi 269 leave 270 ret 271 272 // this code segment is branched in from op_nonzero, with op in cl and this.value in esi 273Llength_base: 274 movzwl %si, %esi // this instruction might not be needed, pad here to give better performance 275 movl %esi, len // len = (unsigned)(this.val); 276 277 movl %ecx, %esi // leave a copy of op at ecx 278 andl $15, %esi // op&=15; 279 je Lop_is_zero // if (op) { 280 cmpl bits, %esi // op vs bits 281 jbe Lop_be_bits // if (bits < op) { 282 movl in, %edx // in 283 movzbl (%edx), %eax // *in 284 movl bits, %ecx // bits 285 sall %cl, %eax // *in << bits 286 addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; 287 incl %edx // in++ 288 movl %edx, in // update in 289 addl $8, bits // bits += 8 290Lop_be_bits: // } 291 movl $1, %eax // 1 292 movl %esi, %ecx // op 293 sall %cl, %eax // 1 << op 294 decl %eax // (1<<op)-1 295 andl hold, %eax // hold & ((1U << op) - 1) 296 addl %eax, len // len += (unsigned)hold & ((1U << op) - 1); 297 shrl %cl, hold // hold >>= op; 298 subl %esi, bits // bits -= op; 299Lop_is_zero: // } 300 cmpl $14, bits // if (bits < 15) { 301 jbe bits_le_14 // branch to refill 16-bit into hold, and branch back to next 302L19: // } 303 movl hold, %eax // hold 304 andl dmask, %eax // hold&dmask 305 movl dcode, %esi // dcode[] : 4-byte aligned 306 movl (%esi,%eax,4), %eax // this = dcode[hold & dmask]; 307 jmp dodist 308 309Lop_16_zero: 310 testb $64, %cl // op&64 311 jne Linvalid_distance_code // if (op&64)!=0, branch to invalid distance code 312 movl $1, %eax // 1 313 sall %cl, %eax // (1<<op) 314 decl %eax // (1<<op)-1 315 andl hold, %eax // (hold & ((1U << op) - 1)) 316 movzwl %dx, %edx // this.val 317 addl %edx, %eax // this.val + (hold & ((1U << op) - 1)) 318 movl dcode, %edx // dcode[] : 4 byte aligned 319 movl (%edx,%eax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))]; 320dodist: 321 movl %eax, %edx // this : (val 16-bit, bits 8-bit, op 8-bit) 322 shrl $16, %edx // edx = this.val 323 movzbl %ah, %ecx // op = (unsigned)(this.bits); 324 shrl %cl, hold // hold >>= op; 325 subl %ecx, bits // bits -= op; 326 movzbl %al, %ecx // op = (unsigned)(this.op); 327 testb $16, %cl // op & 16 328 je Lop_16_zero // if (op&16)==0 goto test op&64 329 330Ldistance_base: // if (op&16) { /* distance base */ 331 andl $15, %ecx // op &= 15; edx = dist = this.val; 332 movl %ecx, op // save a copy of op 333 cmpl bits, %ecx // op vs bits 334 jbe 0f // if (bits < op) { 335 movl in, %ecx // in 336 movzbl (%ecx), %eax // *in 337 movl bits, %ecx // bits 338 sall %cl, %eax // *in << bits 339 addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; 340 incl in // in++ 341 addl $8, bits // bits += 8 342 cmpl bits, op // op vs bits 343 jbe 0f // if (bits < op) { 344 movl in, %esi // i 345 movzbl (%esi), %eax // *in 346 movl bits, %ecx // cl = bits 347 sall %cl, %eax // *in << bits 348 addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; 349 incl %esi // in++ 350 movl %esi, in // update in 351 addl $8, bits // bits += 8 3520: // } } 353 354 movzwl %dx, %edx // dist = (unsigned)(this.val); 355 movl $1, %eax // 1 356 movzbl op, %ecx // cl = op 357 sall %cl, %eax // 1 << op 358 decl %eax // ((1U << op) - 1) 359 andl hold, %eax // (unsigned)hold & ((1U << op) - 1) 360 addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1); 361 362#ifdef INFLATE_STRICT 363 364 cmpl dmax, %eax // dist vs dmax 365 ja Linvalid_distance_too_far_back // if (dist > dmax) break for invalid distance too far back 366 367#endif 368 369 movl %eax, dist // save a copy of dist in stack 370 shrl %cl, hold // hold >>= op; 371 subl %ecx, bits // bits -= op; 372 373 movl out, %eax 374 subl beg, %eax // eax = op = out - beg 375 cmpl %eax, dist // dist vs op 376 jbe Lcopy_direct_from_output // if (dist <= op) branch to copy direct from output 377 378 // if (dist > op) { 379 movl dist, %ecx // dist 380 subl %eax, %ecx // esi = op = dist - op; 381 cmpl %ecx, whave // whave vs op 382 jb Linvalid_distance_too_far_back // if (op > whave) break for error; 383 384 movl write, %edx 385 testl %edx, %edx 386 jne Lwrite_non_zero // if (write==0) { 387 movl wsize, %eax // wsize 388 subl %ecx, %eax // wsize-op 389 movl window, %esi // from=window-OFF 390 addl %eax, %esi // from += wsize-op 391 movl out, %edx // out 392 cmpl %ecx, len // len vs op 393 jbe L38 // if !(op < len) skip 394 subl %ecx, len // len - op 3950: // do { 396 movzbl (%esi), %eax // 397 movb %al, (%edx) // 398 incl %edx // 399 incl %esi // PUP(out) = PUP(from); 400 decl %ecx // --op; 401 jne 0b // } while (op); 402 403 movl %edx, out // update out 404 movl %edx, %esi // out 405 subl dist, %esi // esi = from = out - dist; 406 407L38: /* copy from output */ 408 409 // while (len > 2) { 410 // PUP(out) = PUP(from); 411 // PUP(out) = PUP(from); 412 // PUP(out) = PUP(from); 413 // len -= 3; 414 // } 415 // if (len) { 416 // PUP(out) = PUP(from); 417 // if (len > 1) 418 // PUP(out) = PUP(from); 419 // } 420 421 movl len, %ecx // len 422 movl out, %edx // out 423 subl $3, %ecx // pre-decrement len by 3 424 jl 1f // if len < 3, branch to 1f for remaining processing 4250: // while (len>2) { 426 movzbl (%esi), %eax 427 movb %al, (%edx) // PUP(out) = PUP(from); 428 movzbl 1(%esi), %eax 429 movb %al, 1(%edx) // PUP(out) = PUP(from); 430 movzbl 2(%esi), %eax 431 movb %al, 2(%edx) // PUP(out) = PUP(from); 432 addl $3, %esi // from += 3; 433 addl $3, %edx // out += 3; 434 subl $3, %ecx // len -= 3; 435 jge 0b // } 436 movl %edx, out // update out, in case len == 0 4371: 438 addl $3, %ecx // post-increment len by 3 439 je L_tst_do_while_loop_end // if (len) { 440 movzbl (%esi), %eax // 441 movb %al, (%edx) // PUP(out) = PUP(from); 442 incl %edx // out++ 443 movl %edx, out // update out, in case len == 1 444 cmpl $2, %ecx // 445 jne L_tst_do_while_loop_end // if len==1, break 446 movzbl 1(%esi), %eax 447 movb %al, (%edx) // PUP(out) = PUP(from); 448 incl %edx // out++ 449 movl %edx, out // update out 450 jmp L_tst_do_while_loop_end // } 451 452 .align 4,0x90 453length_2nd_level_else: 454 andl $32, %ecx // test end-of-block 455 je invalid_literal_length_code // if (op&32)==0, branch for invalid literal/length code break 456 movl state, %edx // if (op&32), end-of-block is detected 457 movl $11, (%edx) // state->mode = TYPE 458 jmp return_unused_bytes 459 460L70: 461 movl out, %edx // out 462 subl %edx, end // (end-out) 463 movl end, %esi // %esi = (end-out) = -(out - end); 464 leal 257(%esi), %eax // %eax = 257 + %esi = 257 - (out -end) 465 jmp L72 // return to update state and return 466 467L67: // %edx = in, to return 5 - (in - last) in %eax 468 subl %edx, last // last - in 469 movl last, %edx // %edx = last - in = - (in - last); 470 leal 5(%edx), %eax // %eax = 5 + %edx = 5 - (in - last); 471 jmp L69 // return to update state and return 472 473bits_le_14: 474#if 1 475 leal 8(bits), %esi // esi = bits+8 476 movl in, %eax // eax = in 477 movzbl (%eax), %edx // edx = *in++ 478 movl bits, %ecx // cl = bits 479 sall %cl, %edx // 1st *in << bits 480 addl hold, %edx // hold += 1st *in << bits 481 movzbl 1(%eax), %eax // 2nd *in 482 movl %esi, %ecx // cl = bits+8 483 sall %cl, %eax // 2nd *in << (bits+8) 484 addl %eax, %edx // hold += 2nd *in << (bits+8) 485 movl %edx, hold // update hold 486 addl $2, in // in += 2 487 addl $16, bits // bits += 16; 488 jmp L19 489#else 490 /* this code segment does not run as fast as the other original code segment, possibly the processor 491 need extra time to handle unaligned short access */ 492 movl in, %edx // unsigned short *inp = (unsigned short *) (in+OFF); 493 movzwl (%edx), %eax // *((unsigned short *) in); 494 movl bits, %ecx // bits 495 sall %cl, %eax // *((unsigned short *) in) << bits 496 addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits; 497 addl $2, %edx // in += 2; 498 addl $16, %ecx // bits += 16; 499 movl %edx, in 500 movl %ecx, bits 501 jmp L19 502#endif 503invalid_literal_length_code: 504 call 0f 5050: popl %eax 506 leal LC2-0b(%eax), %eax 507 movl %eax, 24(strm) 508 movl state, %esi 509 movl $27, (%esi) 510 jmp return_unused_bytes 511Linvalid_distance_code: 512 call 0f 5130: popl %eax 514 leal LC1-0b(%eax), %eax 515 movl %eax, 24(strm) 516 movl state, %eax 517 movl $27, (%eax) 518 jmp return_unused_bytes 519 520#ifdef INFLATE_STRICT 521 .align 4,0x90 522 .byte 0 523 .byte 0 524 .byte 0 525 .byte 0 526 .byte 0 527 .byte 0 528 .byte 0 529 .byte 0 530 .byte 0 531#endif 532Lcopy_direct_from_output: 533 movl out, %edx // out 534 subl dist, %edx // from = out - dist 535 movl out, %ecx // out 536 movl len, %esi // len 537 subl $3, %esi // pre-decement len by 3 5380: // do { 539 movzbl (%edx), %eax 540 movb %al, (%ecx) // PUP(out) = PUP(from); 541 movzbl 1(%edx), %eax 542 movb %al, 1(%ecx) // PUP(out) = PUP(from); 543 movzbl 2(%edx), %eax 544 movb %al, 2(%ecx) // PUP(out) = PUP(from); 545 addl $3, %edx // from += 3 546 addl $3, %ecx // out += 3 547 subl $3, %esi // len -= 3 548 jge 0b // } while (len > 2); 549 movl %ecx, out // update out in case len == 0 550 addl $3, %esi // post-increment len by 3 551 je L_tst_do_while_loop_end // if (len) { 552 movzbl (%edx), %eax 553 movb %al, (%ecx) // PUP(out) = PUP(from); 554 incl %ecx 555 movl %ecx, out // out++ 556 cmpl $2, %esi // 557 jne L_tst_do_while_loop_end // if (len>2) 558 movzbl 1(%edx), %eax 559 movb %al, (%ecx) // PUP(out) = PUP(from); 560 incl %ecx 561 movl %ecx, out // out++ 562 jmp L_tst_do_while_loop_end // } 563 564 .align 4,0x90 565Lwrite_non_zero: // %edx = write, %ecx = op 566 movl window, %esi // from = window - OFF; 567 cmp %ecx, %edx // write vs op, test for wrap around window or contiguous in window 568 jae Lcontiguous_in_window // if (write >= op) branch to contiguous in window 569 570Lwrap_around_window: // wrap around window 571 addl write_wsize, %esi // from += write+wsize 572 subl %ecx, %esi // from += wsize + write - op; 573 subl %edx, %ecx // op -= write 574 cmpl %ecx, len // len vs op 575 jbe L38 // if (len <= op) break to copy from output 576 subl %ecx, len // len -= op; 577 movl out, %edx // out 5780: // do { 579 movzbl (%esi), %eax // *from 580 movb %al, (%edx) // *out 581 incl %esi // from++ 582 incl %edx // out++ 583 decl %ecx // --op 584 jne 0b // } while (op); 585 586 movl %edx, out // save out in case we need to break to L38 587 movl window, %esi // from = window - OFF; 588 movl len, %eax // len 589 cmpl %eax, write // write vs len 590 jae L38 // if (write >= len) break to L38 591 592 movl write, %ecx // op = write 593 subl %ecx, len // len -= op; 5940: // do { 595 movzbl (%esi), %eax // *from 596 movb %al, (%edx) // *out 597 incl %esi // from++ 598 incl %edx // out++ 599 decl %ecx // --op 600 jne 0b // } while (op); 601 602 movl %edx, %esi // from = out 603 movl %edx, out // save a copy of out 604 subl dist, %esi // from = out - dist; 605 jmp L38 // break to copy from output 606 607Lcontiguous_in_window: // contiguous in window, edx = write, %ecx = op 608 subl %ecx, %edx // write - op 609 addl %edx, %esi // from += write - op; 610 cmpl %ecx, len // len vs op 611 jbe L38 // if (len <= op) break to copy from output 612 movl out, %edx // out 613 subl %ecx, len // len -= op; 614 6150: // do { 616 movzbl (%esi), %eax // *from 617 movb %al, (%edx) // *out 618 incl %esi // from++ 619 incl %edx // out++ 620 decl %ecx // op-- 621 jne 0b // } while (op); 622 623 movl %edx, out // update out 624 movl %edx, %esi // from = out 625 subl dist, %esi // from = out - dist; 626 jmp L38 627 628Linvalid_distance_too_far_back: 629 call 0f 6300: popl %eax 631 leal LC0-0b(%eax), %eax 632 movl %eax, 24(strm) 633 movl state, %ecx 634 movl $27, (%ecx) 635 jmp return_unused_bytes 636 637#endif 638 639#if (defined __x86_64__) 640 .cstring 641LC0: 642 .ascii "invalid distance too far back\0" 643LC1: 644 .ascii "invalid distance code\0" 645LC2: 646 .ascii "invalid literal/length code\0" 647 .text 648 .align 4,0x90 649 650#ifdef INFLATE_STRICT 651 .byte 0 652 .byte 0 653 .byte 0 654 .byte 0 655 .byte 0 656 .byte 0 657 .byte 0 658 .byte 0 659 .byte 0 660 .byte 0 661 .byte 0 662 .byte 0 663#endif 664 665.globl _inflate_fast 666_inflate_fast: 667 668 // set up rbp 669 pushq %rbp 670 movq %rsp, %rbp 671 672 // save registers in stack 673 pushq %r15 674 pushq %r14 675 pushq %r13 676 pushq %r12 677 pushq %rbx 678 679 #define strm %r13 680 #define state %rdi 681 #define in %r12 682 #define in_d %r12d 683 #define out %r10 684 #define out_d %r10d 685 #define write %r15d 686 #define hold %r9 687 #define holdd %r9d 688 #define bits %r8d 689 #define lcode %r14 690 #define len %ebx 691 #define from %rcx 692 #define dmax %r11d 693 694 #define last -104(%rbp) 695 #define beg -96(%rbp) 696 #define end -88(%rbp) 697 #define wsize -80(%rbp) 698 #define whave -76(%rbp) 699 #define window -72(%rbp) 700 #define dcode -64(%rbp) 701 #define lmask -56(%rbp) 702 #define dmask -112(%rbp) 703 #define wsize_write -116(%rbp) 704 #define write_1 -128(%rbp) 705 #define dist -44(%rbp) 706 707 // reserve stack memory for local variables 128-40=88 708 subq $88, %rsp 709 710 movq %rdi, strm 711 movq 56(%rdi), state // state = (struct inflate_state FAR *)strm->state; 712 movq (strm), in // in = strm->next_in - OFF; 713 movl 8(strm), %eax // strm->avail_in 714 subl $5, %eax // (strm->avail_in - 5) 715 addq in, %rax // in + (strm->avail_in - 5) 716 movq %rax, last // last = in + (strm->avail_in - 5) 717 movq 24(strm), out // out = strm->next_out 718 movl 32(strm), %eax // strm->avail_out 719 subl %eax, %esi // (start - strm->avail_out); 720 movq out, %rdx // strm->next_out 721 subq %rsi, %rdx // out - (start - strm->avail_out); 722 movq %rdx, beg // beg = out - (start - strm->avail_out); 723 subl $257, %eax // (strm->avail_out - 257) 724 addq out, %rax // out + (strm->avail_out - 257); 725 movq %rax, end // end = out + (strm->avail_out - 257); 726 727#ifdef INFLATE_STRICT 728 movl 20(state), dmax // dmax = state->dmax; 729#endif 730 731 movl 52(state), %ecx // state->wsize 732 movl %ecx, wsize // wsize = state->wsize; 733 movl 56(state), %ebx // state->whave; 734 movl %ebx, whave // whave = state->whave; 735 movl 60(state), write // write = state->write; 736 movq 64(state), %rax // state->window 737 movq %rax, window // window = state->window; 738 movq 72(state), hold // hold = state->hold; 739 movl 80(state), bits // bits = state->bits; 740 741 movq 96(state), lcode // lcode = state->lencode; 742 movq 104(state), %rdx // state->distcode; 743 movq %rdx, dcode // dcode = state->distcode; 744 745 movl 116(state), %ecx // state->distbits 746 movl $1, %eax 747 movl %eax, %edx // 1 748 sall %cl, %edx // (1U << state->distbits) 749 movl 112(state), %ecx // state->lenbits 750 sall %cl, %eax // (1U << state->lenbits) 751 decl %eax // (1U << state->lenbits) - 1 752 movq %rax, lmask // lmask = (1U << state->lenbits) - 1 753 decl %edx // (1U << state->distbits) - 1 754 movq %rdx, dmask // dmask = (1U << state->distbits) - 1 755 756 movl wsize, %ecx // wsize 757 addl write, %ecx // wsize + write 758 movl %ecx, wsize_write // wsize_write = wsize + write 759 760 leal -1(%r15), %ebx // write - 1 761 movq %rbx, write_1 // write_1 = write - 1 762 763L_do_while_loop: 764 cmpl $14, bits // bits vs 14 765 ja 0f // if (bits < 15) { 766 movzwl (in), %eax // read 2 bytes from in 767 movl bits, %ecx // set up cl = bits 768 salq %cl, %rax // (*in) << bits 769 addq %rax, hold // hold += (*in) << bits 770 addq $2, in // in += 2 771 addl $16, bits // bits += 16 7720: // } 773 movq lmask, %rax // lmask 774 andq hold, %rax // hold & lmask 775 jmp 1f 776 .align 4,0x90 777Lop_nonzero: 778 movzbl %al, %ecx // op in al and cl 779 testb $16, %cl // check for length base processing (op&16) 780 jne L_length_base // if (op&16) branch to length base processing 781 testb $64, %cl // check for 2nd level length code (op&64==0) 782 jne L_end_of_block // if (op&64)!=0, branch for end-of-block processing 783 784 /* 2nd level length code : (op&64) == 0*/ 785L_2nd_level_length_code: 786 movl $1, %eax // 1 787 sall %cl, %eax // 1 << op 788 decl %eax // ((1U << op) - 1) 789 andq hold, %rax // (hold & ((1U << op) - 1)) 790 movzwl %dx, %edx 791 addq %rdx, %rax // this = lcode[this.val + (hold & ((1U << op) - 1))]; 7921: 793 movl (lcode,%rax,4), %eax // this = lcode[hold & lmask]; 794Ldolen: 795 movl %eax, %edx // a copy of this 796 shrl $16, %edx // edx = this.val; 797 movzbl %ah, %ecx // op = this.bits 798 shrq %cl, hold // hold >>= op; 799 subl %ecx, bits // bits -= op; 800 testb %al, %al // op = (unsigned)(this.op); 801 jne Lop_nonzero // if (op!-0) branch for copy operation 802L_literal: 803 movb %dl, (out) // *out = this.val 804 incq out // out ++ 805L_do_while_loop_check: 806 cmpq last, in // in vs last 807 jae L_return_unused_byte // if in >= last, break to return unused byte processing 808 cmpq end, out // out vs end 809 jb L_do_while_loop // back to do_while_loop if out < end 810 811 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 812 813L_return_unused_byte: 814 movl out_d, %esi 815 jmp L34 816 817L_length_base: /* al = cl = op, edx = this.val, op&16 = 16 */ 818 movzwl %dx, len // len = (unsigned)(this.val); 819 movl %ecx, %edx // op 820 andl $15, %edx // op &= 15; 821 je 1f // if (op) { 822 cmpl bits, %edx // op vs bits 823 jbe 0f // if (bits < op) { 824 movzbl (in), %eax // *in 825 movl bits, %ecx // cl = bits 826 salq %cl, %rax // *in << bits 827 addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; 828 incq in // in++ 829 addl $8, bits // bits += 8 8300: // } 831 movl $1, %eax // 1 832 movl %edx, %ecx // cl = op 833 sall %cl, %eax // 1 << op 834 decl %eax // (1 << op) - 1 835 andl holdd, %eax // (unsigned)hold & ((1U << op) - 1); 836 addl %eax, len // len += (unsigned)hold & ((1U << op) - 1); 837 shrq %cl, hold // hold >>= op; 838 subl %edx, bits // bits -= op; 8391: // } 840 cmpl $14, bits // bits vs 14 841 jbe L99 // if (bits < 15) go to loading to hold and return to L19 842L19: // } 843 movq dmask, %rax // dmask 844 andq hold, %rax // hold & dmask 845 movq dcode, %rdx // dcode[] 846 movl (%rdx,%rax,4), %eax // this = dcode[hold & dmask]; 847 jmp L_dodist 848 .align 4,0x90 8490: // op&16 == 0, test (op&64)==0 for 2nd level distance code 850 testb $64, %cl // op&64 851 jne L_invalid_distance_code // if ((op&64)==0) { /* 2nd level distance code */ 852 movl $1, %eax // 1 853 sall %cl, %eax // 1 << op 854 decl %eax // (1 << op) - 1 855 andq hold, %rax // (hold & ((1U << op) - 1)) 856 movzwl %dx, %edx // this.val 857 addq %rdx, %rax // this.val + (hold & ((1U << op) - 1)) 858 movq dcode, %rcx // dcode[] 859 movl (%rcx,%rax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))]; 860L_dodist: 861 movl %eax, %edx // this 862 shrl $16, %edx // dist = (unsigned)(this.val); 863 movzbl %ah, %ecx // cl = op = this.bits 864 shrq %cl, hold // hold >>= op; 865 subl %ecx, bits // bits -= op; 866 movzbl %al, %ecx // op = (unsigned)(this.op); 867 testb $16, %cl // (op & 16) test for distance base 868 je 0b // if (op&16) == 0, branch to check for 2nd level distance code 869 870L_distance_base: /* distance base */ 871 872 movl %ecx, %esi // op 873 andl $15, %esi // op&=15 874 cmpl bits, %esi // op vs bits 875 jbe 1f // if (bits < op) { 876 movzbl (in), %eax // *in 877 movl bits, %ecx // cl = bits 878 salq %cl, %rax // *in << bits 879 addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; 880 incq in // in++ 881 addl $8, bits // bits += 8 882 cmpl bits, %esi // op vs bits 883 jbe 1f // if (bits < op) { 884 movzbl (in), %eax // *in 885 movl bits, %ecx // cl = bits 886 salq %cl, %rax // *in << bits 887 addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; 888 incq in // in++ 889 addl $8, bits // bits += 8 8901: // } } 891 892 movzwl %dx, %edx // dist 893 movl $1, %eax // 1 894 movl %esi, %ecx // cl = op 895 sall %cl, %eax // (1 << op) 896 decl %eax // (1 << op) - 1 897 andl holdd, %eax // (unsigned)hold & ((1U << op) - 1) 898 addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1); 899 movl %eax, dist // save a copy of dist in stack 900 901#ifdef INFLATE_STRICT 902 cmp %eax, dmax // dmax vs dist 903 jb L_invalid_distance_too_far_back // if (dmax < dist) break for invalid distance too far back 904#endif 905 906 shrq %cl, hold // hold >>= op; 907 subl %esi, bits // bits -= op; 908 movl out_d, %esi // out 909 movl out_d, %eax // out 910 subl beg, %eax // op = out - beg 911 cmpl %eax, dist // dist vs op, /* see if copy from window */ 912 jbe L_copy_direct_from_output // if (dist <= op) branch to copy direct from output 913 914L_distance_back_in_window: 915 916 movl dist, %edx // dist 917 subl %eax, %edx // op = dist - op; /* distance back in window */ 918 919 cmpl %edx, whave // whave vs op 920 jb L_invalid_distance_too_far_back // if (op > whave), break for invalid distance too far back 921 922 testl write, write // if (write!=0) 923 jne L_wrap_around_window // branch to wrap around window 924 925L_very_common_case: 926 927 movl wsize, %eax // wsize 928 subl %edx, %eax // wsize - op 929 movq window, from // from = window - OFF; 930 addq %rax, from // from += wsize - op; 931 932 movl %edx, %esi // op 933 cmpl %edx, len // len vs op 934 ja L_some_from_window // if (len > op), branch for aligned code block L_some_from_window 935L38: 936 subl $3, len // pre-decrement len by 3 937 jge 0f // if len >= 3, branch to the aligned code block 9381: addl $3, len // post-increment len by 3 939 je L_do_while_loop_check // if (len==0) break to L_do_while_loop_check 940 movzbl (from), %eax // *from 941 movb %al, (out) // *out 942 incq out // out++ 943 cmpl $2, len // len vs 2 944 jne L_do_while_loop_check // if len!=2 break to L_do_while_loop_check 945 movzbl 1(from), %eax // *from 946 movb %al, (out) // *out 947 incq out // out++ 948 jmp L_do_while_loop_check // break to L_do_while_loop_check 949 950 .align 4,0x90 9510: // do { 952 movzbl (from), %eax // *from 953 movb %al, (out) // *out 954 movzbl 1(from), %eax // *from 955 movb %al, 1(out) // *out 956 movzbl 2(from), %eax // *from 957 movb %al, 2(out) // *out 958 addq $3, out // out += 3 959 addq $3, from // from += 3 960 subl $3, len // len -= 3 961 jge 0b // } while (len>=0); 962 jmp 1b // branch back to the possibly unaligned code 963 964 .align 4,0x90 965L_end_of_block: 966 andl $32, %ecx // op & 32 967 jne L101 // if (op&32) branch to end-of-block break 968 leaq LC2(%rip), from 969 movq from, 48(strm) // state->mode 970 movl $27, (state) // state->mode = BAD; 971 movl out_d, %esi 972 973L34: 974 movl bits, %eax // bits 975 shrl $3, %eax // len = bits >> 3; 976 mov %eax, %edx // len 977 subq %rdx, in // in -= len 978 sall $3, %eax // len << 3 979 movl bits, %ecx // bits 980 subl %eax, %ecx // bits -= len << 3 981 movq in, (strm) // strm->next_in = in + OFF; 982 movq out, 24(strm) // strm->next_out = out + OFF; 983 cmpq in, last // last vs in 984 jbe L67 // if (last <= in) branch to L67 and return to L69 985 movl last, %eax // last 986 addl $5, %eax // last + 5 987 subl in_d, %eax // 5 + last - in 988L69: 989 movl %eax, 8(strm) // update strm->avail_in 990 991 cmpq end, out // out vs end 992 jae L70 // if out<=end branch to L70 and return to L72 993 movl end, %eax // end 994 addl $257, %eax // 257 + end 995 subl %esi, %eax // 257 + end - out; 996L72: 997 movl %eax, 32(strm) // update strm->avail_out 998 999 movl $1, %eax // 1 1000 sall %cl, %eax // 1 << bits 1001 decl %eax // (1U << bits) - 1 1002 andq hold, %rax // hold &= (1U << bits) - 1; 1003 movq %rax, 72(state) // state->hold = hold; 1004 movl %ecx, 80(state) // state->bits = bits; 1005 1006 // clear stack memory for local variables 1007 addq $88, %rsp 1008 1009 // restore registers from stack 1010 popq %rbx 1011 popq %r12 1012 popq %r13 1013 popq %r14 1014 popq %r15 1015 1016 // return to caller 1017 leave 1018 ret 1019 1020 .align 4,0x90 1021L99: 1022 leal 8(bits), %esi // esi = bits+8 1023 movzbl (in), %edx // 1st *in 1024 movl bits, %ecx // cl = bits 1025 salq %cl, %rdx // 1st *in << 8 1026 addq %rdx, hold // 1st hold += (unsigned long)(PUP(in)) << bits; 1027 movzbl 1(in), %eax // 2nd *in 1028 movl %esi, %ecx // cl = bits + 8 1029 salq %cl, %rax // 2nd *in << bits+8 1030 addq %rax, hold // 2nd hold += (unsigned long)(PUP(in)) << bits; 1031 addq $2, in // in += 2 1032 addl $16, bits // bits += 16 1033 jmp L19 1034 1035L101: 1036 movl $11, (state) 1037 movl out_d, %esi 1038 jmp L34 1039 .align 4,0x90 1040L70: 1041 movl end, %eax // end 1042 subl %esi, %eax // end - out 1043 addl $257, %eax // 257 + end - out 1044 jmp L72 1045 .align 4,0x90 1046L67: 1047 movl last, %eax // last 1048 subl in_d, %eax // last - in 1049 addl $5, %eax // 5 + last - in 1050 jmp L69 1051 1052 1053 .align 4,0x90 1054 1055 // stuffing the following 4 bytes to align the major loop to a 16-byte boundary to give the better performance 1056 .byte 0 1057 .byte 0 1058 .byte 0 1059 .byte 0 1060L_copy_direct_from_output: 1061 mov dist, %eax // dist 1062 movq out, %rdx // out 1063 subq %rax, %rdx // from = out - dist; 1064 subl $3, len // pre-decrement len by 3 1065 // do { 10660: movzbl (%rdx), %eax // *from 1067 movb %al, (out) // *out 1068 movzbl 1(%rdx), %eax // *from 1069 movb %al, 1(out) // *out 1070 movzbl 2(%rdx), %eax // *from 1071 movb %al, 2(out) // *out 1072 addq $3, out // out+=3 1073 addq $3, %rdx // from+=3 1074 subl $3, len // len-=3 1075 jge 0b // } while (len>=0); 10761: addl $3, len // post-increment len by 3 1077 je L_do_while_loop_check // if len==0, branch to do_while_loop_check 1078 1079 movzbl (%rdx), %eax // *from 1080 movb %al, (out) // *out 1081 incq out // out++ 1082 cmpl $2, len // len == 2 ? 1083 jne L_do_while_loop_check // if len==1, branch to do_while_loop_check 1084 1085 movzbl 1(%rdx), %eax // *from 1086 movb %al, (out) // *out 1087 incq out // out++ 1088 jmp L_do_while_loop_check // branch to do_while_loop_check 1089 1090 .align 4,0x90 1091L_some_from_window: // from : from, out, %esi/%edx = op 1092 // do { 1093 movzbl (from), %eax // *from 1094 movb %al, (out) // *out 1095 incq from // from++ 1096 incq out // out++ 1097 decl %esi // --op 1098 jne L_some_from_window // } while (op); 1099 subl %edx, len // len -= op; 1100 mov dist, %eax // dist 1101 movq out, from // out 1102 subq %rax, from // from = out - dist; 1103 jmp L38 // copy from output 1104 1105 .align 4,0x90 1106L_wrap_around_window: 1107 cmpl %edx, write // write vs op 1108 jae L_contiguous_in_window // if (write >= op) branch to contiguous in window 1109 movl wsize_write, %eax // wsize+write 1110 subl %edx, %eax // wsize+write-op 1111 movq window, from // from = window - OFF 1112 addq %rax, from // from += wsize+write-op 1113 subl write, %edx // op -= write 1114 cmpl %edx, len // len vs op 1115 jbe L38 // if (len<=op) branch to copy from output 1116 1117 subl %edx, len // len -= op; 11180: // do { 1119 movzbl (from), %eax // *from 1120 movb %al, (out) // *out 1121 incq from // from++ 1122 incq out // out++ 1123 decl %edx // op-- 1124 jne 0b // } while (op); 1125 movq window, from 1126 1127 cmpl len, write // write vs len 1128 jae L38 // if (write >= len) branch to copy from output 1129 movl write, %esi // op = write 1130 subl write, len // len -= op 11311: // do { 1132 movzbl (from), %eax // *from 1133 movb %al, (out) // *out 1134 incq from // from++ 1135 incq out // out++ 1136 decl %esi // op-- 1137 jne 1b // } while (op); 1138 mov dist, %eax // dist 1139 movq out, from // out 1140 subq %rax, from // from = out - dist; 1141 jmp L38 1142 1143 .align 4,0x90 1144L_contiguous_in_window: 1145 movl write, %eax // write 1146 subl %edx, %eax // write - op 1147 movq window, from // from = window - OFF 1148 addq %rax, from // from += write - op 1149 cmpl %edx, len // len vs op 1150 jbe L38 // if (len <= op) branch to copy from output 1151 subl %edx, len // len -= op; 11522: // do { 1153 movzbl (from), %eax // *from 1154 movb %al, (out) // *out 1155 incq from // from++ 1156 incq out // out++ 1157 decl %edx // op-- 1158 jne 2b // } while (op); 1159 1160 mov dist, %eax // dist 1161 movq out, from // out 1162 subq %rax, from // from = out - dist; 1163 jmp L38 // copy from output 1164 1165 .align 4,0x90 1166L_invalid_distance_code: 1167 leaq LC1(%rip), %rdx 1168 movq %rdx, 48(strm) 1169 movl $27, (state) 1170 movl out_d, %esi 1171 jmp L34 1172 1173L_invalid_distance_too_far_back: 1174 leaq LC0(%rip), %rbx 1175 movq %rbx, 48(strm) // error message 1176 movl $27, (state) // state->mode = BAD 1177 jmp L34 1178 1179#endif 1180