#if (defined __i386__) /* this assembly was 1st compiled from inffast.c (assuming POSTINC defined, OFF=0) and then hand optimized */ .cstring LC0: .ascii "invalid distance too far back\0" LC1: .ascii "invalid distance code\0" LC2: .ascii "invalid literal/length code\0" .text .align 4,0x90 #ifdef INFLATE_STRICT .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 #endif .globl _inflate_fast _inflate_fast: // set up ebp to refer to arguments strm and start pushl %ebp movl %esp, %ebp // push edi/esi/ebx into stack pushl %edi pushl %esi pushl %ebx // allocate for local variables 92-12=80, + 12 to align %esp to 16-byte boundary subl $92, %esp movl 8(%ebp), %ebx /* definitions to help code readability */ #define bits %edi #define strm %ebx #define state 28(strm) // state = (struct inflate_state FAR *)strm->state; #define in -84(%ebp) // in = strm->next_in - OFF; OFF=0 #define last -80(%ebp) // last = in + (strm->avail_in - 5); #define out -28(%ebp) // out = strm->next_out - OFF; #define beg -76(%ebp) // beg = out - (start - strm->avail_out); #define end -72(%ebp) // end = out + (strm->avail_out - 257); #define wsize -68(%ebp) // wsize = state->wsize; #define whave -64(%ebp) // whave = state->whave; #define write -60(%ebp) // write = state->write; #define window -56(%ebp) // window = state->window; #define hold -52(%ebp) // hold = state->hold; #define lcode -48(%ebp) // lcode = state->lencode; #define dcode -44(%ebp) // dcode = state->distcode; #define lmask -40(%ebp) // lmask = (1U << state->lenbits) - 1; #define dmask -36(%ebp) // dmask = (1U << state->distbits) - 1; #define len -32(%ebp) #define dmax -20(%ebp) #define dist -16(%ebp) // dist #define write_wsize -24(%ebp) // write+wsize #define write_1 -88(%ebp) // write-1 #define op -92(%ebp) // op movl (strm), %eax // strm->next_in movl %eax, in // in = strm->next_in - OFF; OFF=0 subl $5, %eax // in - 5; movl 4(strm), %ecx // strm->avail_in addl %ecx, %eax // in + (strm->avail_in - 5); movl %eax, last // last = in + (strm->avail_in - 5); movl 12(strm), %esi // strm->next_out movl %esi, out // out = strm->next_out - OFF; movl 16(strm), %ecx // strm->avail_out movl %esi, %eax // out subl 12(%ebp), %eax // out - start addl %ecx, %eax // out - (start - strm->avail_out); movl %eax, beg // beg = out - (start - strm->avail_out); leal -257(%esi,%ecx), %ecx // out + (strm->avail_out - 257); movl %ecx, end // end = out + (strm->avail_out - 257); movl state, %edx #ifdef INFLATE_STRICT movl 20(%edx), %ecx // state->dmax movl %ecx, dmax // dmax = state->dmax; #endif movl 40(%edx), %ecx // state->wsize movl %ecx, wsize // wsize = state->wsize; movl 44(%edx), %ecx // state->whave movl %ecx, whave // whave = state->whave; movl 48(%edx), %esi // state->write movl %esi, write // write = state->write; movl 52(%edx), %eax // state->window movl %eax, window // window = state->window; movl 56(%edx), %ecx // state->hold movl %ecx, hold // hold = state->hold movl 60(%edx), bits // bits = state->bits; movl 76(%edx), %esi // state->lencode movl %esi, lcode // lcode = state->lencode; movl 80(%edx), %eax // state->distcode movl %eax, dcode // dcode = state->distcode; movl 84(%edx), %ecx // state->lenbits movl $1, %eax movl %eax, %esi // a copy of 1 sall %cl, %esi // 1 << state->lenbits decl %esi // (1U << state->lenbits) - 1; movl %esi, lmask // lmask = (1U << state->lenbits) - 1; movl 88(%edx), %ecx // state->distbits sall %cl, %eax // 1 << state->distbits decl %eax // (1U << state->distbits) - 1; movl %eax, dmask // dmask = (1U << state->distbits) - 1; // these 2 might be used often, precomputed and saved in stack movl write, %eax addl wsize, %eax movl %eax, write_wsize // write+wsize movl write, %edx decl %edx movl %edx, write_1 // write-1 L_do_while_loop: // do { cmpl $15, bits jae bits_ge_15 // if (bits < 15) { #if 0 leal 8(bits), %esi // esi = bits+8 movl in, %eax // eax = in movzbl (%eax), %edx // edx = *in++ movl bits, %ecx // cl = bits sall %cl, %edx // 1st *in << bits addl hold, %edx // hold += 1st *in << bits movzbl 1(%eax), %eax // 2nd *in movl %esi, %ecx // cl = bits+8 sall %cl, %eax // 2nd *in << (bits+8) addl %eax, %edx // hold += 2nd *in << (bits+8) movl %edx, hold // update hold addl $2, in // in += 2 addl $16, bits // bits += 16; #else /* from simulation, this code segment performs better than the other case possibly, we are more often hit with aligned memory access */ movl in, %ecx // unsigned short *inp = (unsigned short *) (in+OFF); movzwl (%ecx), %eax // *((unsigned short *) in); movl bits, %ecx // bits sall %cl, %eax // *((unsigned short *) in) << bits addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits; addl $2, in // in += 2; addl $16, bits // bits += 16; #endif bits_ge_15: // } /* bits < 15 */ movl hold, %eax // hold andl lmask, %eax // hold & lmask; movl lcode, %esi // lcode[] : 4-byte aligned movl (%esi,%eax,4), %eax // this = lcode[hold&lmask]; jmp dolen .align 4,0x90 op_nonzero: movzbl %al, %ecx // a copy of op to cl testb $16, %cl // if op&16 jne Llength_base // branch to length_base testb $64, %cl // elif op&64 jne length_2nd_level_else // branch to 2nd level length code else conditions // 2nd level length code movl $1, %eax sall %cl, %eax // 1 << op decl %eax // ((1<>= op; subl %ecx, bits // bits -= op; testb %al, %al // op = (unsigned)(this.op); jne op_nonzero // if op!=0, branch to op_nonzero movl %esi, %ecx // this.val; movl out, %eax // out movb %cl, (%eax) // PUP(out) = (unsigned char)(this.val); incl %eax // out++; movl %eax, out // save out L_tst_do_while_loop_end: movl last, %eax // last cmpl %eax, in // in vs last jae return_unused_bytes // branch to return_unused_bytes if in >= last movl end, %edx // end cmpl %edx, out // out vs end jb L_do_while_loop // branch to do loop if out < end return_unused_bytes: movl bits, %eax // bits shrl $3, %eax // len = bits >> 3 movl in, %edx // in subl %eax, %edx // in -= len sall $3, %eax // len << 3 movl bits, %ecx // bits subl %eax, %ecx // bits -= len << 3 movl %edx, (strm) // strm->next_in = in + OFF; movl out, %eax movl %eax, 12(strm) // strm->next_out = out + OFF; cmpl %edx, last // last vs in jbe L67 // if (last <= in) branch to L67 and return to L69 movl last, %eax // last addl $5, %eax // 5 + last subl %edx, %eax // 5 + last - in L69: movl %eax, 4(strm) // update strm->avail_in movl end, %eax cmpl %eax, out // out vs end jae L70 // if (out>=end) branch to L70, and return to L72 addl $257, %eax // 257 + end subl out, %eax // 257 + end - out L72: movl %eax, 16(strm) // update strm->avail_out movl $1, %eax sall %cl, %eax // 1 << bits decl %eax // (1 << bits) -1 andl hold, %eax // hold &= (1U << bits) - 1; movl state, %esi movl %eax, 56(%esi) // state->hold = hold; movl %ecx, 60(%esi) // state->bits = bits; addl $92, %esp // pop out local from stack // restore saved registers and return popl %ebx popl %esi popl %edi leave ret // this code segment is branched in from op_nonzero, with op in cl and this.value in esi Llength_base: movzwl %si, %esi // this instruction might not be needed, pad here to give better performance movl %esi, len // len = (unsigned)(this.val); movl %ecx, %esi // leave a copy of op at ecx andl $15, %esi // op&=15; je Lop_is_zero // if (op) { cmpl bits, %esi // op vs bits jbe Lop_be_bits // if (bits < op) { movl in, %edx // in movzbl (%edx), %eax // *in movl bits, %ecx // bits sall %cl, %eax // *in << bits addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; incl %edx // in++ movl %edx, in // update in addl $8, bits // bits += 8 Lop_be_bits: // } movl $1, %eax // 1 movl %esi, %ecx // op sall %cl, %eax // 1 << op decl %eax // (1<>= op; subl %esi, bits // bits -= op; Lop_is_zero: // } cmpl $14, bits // if (bits < 15) { jbe bits_le_14 // branch to refill 16-bit into hold, and branch back to next L19: // } movl hold, %eax // hold andl dmask, %eax // hold&dmask movl dcode, %esi // dcode[] : 4-byte aligned movl (%esi,%eax,4), %eax // this = dcode[hold & dmask]; jmp dodist Lop_16_zero: testb $64, %cl // op&64 jne Linvalid_distance_code // if (op&64)!=0, branch to invalid distance code movl $1, %eax // 1 sall %cl, %eax // (1<>= op; subl %ecx, bits // bits -= op; movzbl %al, %ecx // op = (unsigned)(this.op); testb $16, %cl // op & 16 je Lop_16_zero // if (op&16)==0 goto test op&64 Ldistance_base: // if (op&16) { /* distance base */ andl $15, %ecx // op &= 15; edx = dist = this.val; movl %ecx, op // save a copy of op cmpl bits, %ecx // op vs bits jbe 0f // if (bits < op) { movl in, %ecx // in movzbl (%ecx), %eax // *in movl bits, %ecx // bits sall %cl, %eax // *in << bits addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; incl in // in++ addl $8, bits // bits += 8 cmpl bits, op // op vs bits jbe 0f // if (bits < op) { movl in, %esi // i movzbl (%esi), %eax // *in movl bits, %ecx // cl = bits sall %cl, %eax // *in << bits addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; incl %esi // in++ movl %esi, in // update in addl $8, bits // bits += 8 0: // } } movzwl %dx, %edx // dist = (unsigned)(this.val); movl $1, %eax // 1 movzbl op, %ecx // cl = op sall %cl, %eax // 1 << op decl %eax // ((1U << op) - 1) andl hold, %eax // (unsigned)hold & ((1U << op) - 1) addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1); #ifdef INFLATE_STRICT cmpl dmax, %eax // dist vs dmax ja Linvalid_distance_too_far_back // if (dist > dmax) break for invalid distance too far back #endif movl %eax, dist // save a copy of dist in stack shrl %cl, hold // hold >>= op; subl %ecx, bits // bits -= op; movl out, %eax subl beg, %eax // eax = op = out - beg cmpl %eax, dist // dist vs op jbe Lcopy_direct_from_output // if (dist <= op) branch to copy direct from output // if (dist > op) { movl dist, %ecx // dist subl %eax, %ecx // esi = op = dist - op; cmpl %ecx, whave // whave vs op jb Linvalid_distance_too_far_back // if (op > whave) break for error; movl write, %edx testl %edx, %edx jne Lwrite_non_zero // if (write==0) { movl wsize, %eax // wsize subl %ecx, %eax // wsize-op movl window, %esi // from=window-OFF addl %eax, %esi // from += wsize-op movl out, %edx // out cmpl %ecx, len // len vs op jbe L38 // if !(op < len) skip subl %ecx, len // len - op 0: // do { movzbl (%esi), %eax // movb %al, (%edx) // incl %edx // incl %esi // PUP(out) = PUP(from); decl %ecx // --op; jne 0b // } while (op); movl %edx, out // update out movl %edx, %esi // out subl dist, %esi // esi = from = out - dist; L38: /* copy from output */ // while (len > 2) { // PUP(out) = PUP(from); // PUP(out) = PUP(from); // PUP(out) = PUP(from); // len -= 3; // } // if (len) { // PUP(out) = PUP(from); // if (len > 1) // PUP(out) = PUP(from); // } movl len, %ecx // len movl out, %edx // out subl $3, %ecx // pre-decrement len by 3 jl 1f // if len < 3, branch to 1f for remaining processing 0: // while (len>2) { movzbl (%esi), %eax movb %al, (%edx) // PUP(out) = PUP(from); movzbl 1(%esi), %eax movb %al, 1(%edx) // PUP(out) = PUP(from); movzbl 2(%esi), %eax movb %al, 2(%edx) // PUP(out) = PUP(from); addl $3, %esi // from += 3; addl $3, %edx // out += 3; subl $3, %ecx // len -= 3; jge 0b // } movl %edx, out // update out, in case len == 0 1: addl $3, %ecx // post-increment len by 3 je L_tst_do_while_loop_end // if (len) { movzbl (%esi), %eax // movb %al, (%edx) // PUP(out) = PUP(from); incl %edx // out++ movl %edx, out // update out, in case len == 1 cmpl $2, %ecx // jne L_tst_do_while_loop_end // if len==1, break movzbl 1(%esi), %eax movb %al, (%edx) // PUP(out) = PUP(from); incl %edx // out++ movl %edx, out // update out jmp L_tst_do_while_loop_end // } .align 4,0x90 length_2nd_level_else: andl $32, %ecx // test end-of-block je invalid_literal_length_code // if (op&32)==0, branch for invalid literal/length code break movl state, %edx // if (op&32), end-of-block is detected movl $11, (%edx) // state->mode = TYPE jmp return_unused_bytes L70: movl out, %edx // out subl %edx, end // (end-out) movl end, %esi // %esi = (end-out) = -(out - end); leal 257(%esi), %eax // %eax = 257 + %esi = 257 - (out -end) jmp L72 // return to update state and return L67: // %edx = in, to return 5 - (in - last) in %eax subl %edx, last // last - in movl last, %edx // %edx = last - in = - (in - last); leal 5(%edx), %eax // %eax = 5 + %edx = 5 - (in - last); jmp L69 // return to update state and return bits_le_14: #if 1 leal 8(bits), %esi // esi = bits+8 movl in, %eax // eax = in movzbl (%eax), %edx // edx = *in++ movl bits, %ecx // cl = bits sall %cl, %edx // 1st *in << bits addl hold, %edx // hold += 1st *in << bits movzbl 1(%eax), %eax // 2nd *in movl %esi, %ecx // cl = bits+8 sall %cl, %eax // 2nd *in << (bits+8) addl %eax, %edx // hold += 2nd *in << (bits+8) movl %edx, hold // update hold addl $2, in // in += 2 addl $16, bits // bits += 16; jmp L19 #else /* this code segment does not run as fast as the other original code segment, possibly the processor need extra time to handle unaligned short access */ movl in, %edx // unsigned short *inp = (unsigned short *) (in+OFF); movzwl (%edx), %eax // *((unsigned short *) in); movl bits, %ecx // bits sall %cl, %eax // *((unsigned short *) in) << bits addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits; addl $2, %edx // in += 2; addl $16, %ecx // bits += 16; movl %edx, in movl %ecx, bits jmp L19 #endif invalid_literal_length_code: call 0f 0: popl %eax leal LC2-0b(%eax), %eax movl %eax, 24(strm) movl state, %esi movl $27, (%esi) jmp return_unused_bytes Linvalid_distance_code: call 0f 0: popl %eax leal LC1-0b(%eax), %eax movl %eax, 24(strm) movl state, %eax movl $27, (%eax) jmp return_unused_bytes #ifdef INFLATE_STRICT .align 4,0x90 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 #endif Lcopy_direct_from_output: movl out, %edx // out subl dist, %edx // from = out - dist movl out, %ecx // out movl len, %esi // len subl $3, %esi // pre-decement len by 3 0: // do { movzbl (%edx), %eax movb %al, (%ecx) // PUP(out) = PUP(from); movzbl 1(%edx), %eax movb %al, 1(%ecx) // PUP(out) = PUP(from); movzbl 2(%edx), %eax movb %al, 2(%ecx) // PUP(out) = PUP(from); addl $3, %edx // from += 3 addl $3, %ecx // out += 3 subl $3, %esi // len -= 3 jge 0b // } while (len > 2); movl %ecx, out // update out in case len == 0 addl $3, %esi // post-increment len by 3 je L_tst_do_while_loop_end // if (len) { movzbl (%edx), %eax movb %al, (%ecx) // PUP(out) = PUP(from); incl %ecx movl %ecx, out // out++ cmpl $2, %esi // jne L_tst_do_while_loop_end // if (len>2) movzbl 1(%edx), %eax movb %al, (%ecx) // PUP(out) = PUP(from); incl %ecx movl %ecx, out // out++ jmp L_tst_do_while_loop_end // } .align 4,0x90 Lwrite_non_zero: // %edx = write, %ecx = op movl window, %esi // from = window - OFF; cmp %ecx, %edx // write vs op, test for wrap around window or contiguous in window jae Lcontiguous_in_window // if (write >= op) branch to contiguous in window Lwrap_around_window: // wrap around window addl write_wsize, %esi // from += write+wsize subl %ecx, %esi // from += wsize + write - op; subl %edx, %ecx // op -= write cmpl %ecx, len // len vs op jbe L38 // if (len <= op) break to copy from output subl %ecx, len // len -= op; movl out, %edx // out 0: // do { movzbl (%esi), %eax // *from movb %al, (%edx) // *out incl %esi // from++ incl %edx // out++ decl %ecx // --op jne 0b // } while (op); movl %edx, out // save out in case we need to break to L38 movl window, %esi // from = window - OFF; movl len, %eax // len cmpl %eax, write // write vs len jae L38 // if (write >= len) break to L38 movl write, %ecx // op = write subl %ecx, len // len -= op; 0: // do { movzbl (%esi), %eax // *from movb %al, (%edx) // *out incl %esi // from++ incl %edx // out++ decl %ecx // --op jne 0b // } while (op); movl %edx, %esi // from = out movl %edx, out // save a copy of out subl dist, %esi // from = out - dist; jmp L38 // break to copy from output Lcontiguous_in_window: // contiguous in window, edx = write, %ecx = op subl %ecx, %edx // write - op addl %edx, %esi // from += write - op; cmpl %ecx, len // len vs op jbe L38 // if (len <= op) break to copy from output movl out, %edx // out subl %ecx, len // len -= op; 0: // do { movzbl (%esi), %eax // *from movb %al, (%edx) // *out incl %esi // from++ incl %edx // out++ decl %ecx // op-- jne 0b // } while (op); movl %edx, out // update out movl %edx, %esi // from = out subl dist, %esi // from = out - dist; jmp L38 Linvalid_distance_too_far_back: call 0f 0: popl %eax leal LC0-0b(%eax), %eax movl %eax, 24(strm) movl state, %ecx movl $27, (%ecx) jmp return_unused_bytes #endif #if (defined __x86_64__) .cstring LC0: .ascii "invalid distance too far back\0" LC1: .ascii "invalid distance code\0" LC2: .ascii "invalid literal/length code\0" .text .align 4,0x90 #ifdef INFLATE_STRICT .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 .byte 0 #endif .globl _inflate_fast _inflate_fast: // set up rbp pushq %rbp movq %rsp, %rbp // save registers in stack pushq %r15 pushq %r14 pushq %r13 pushq %r12 pushq %rbx #define strm %r13 #define state %rdi #define in %r12 #define in_d %r12d #define out %r10 #define out_d %r10d #define write %r15d #define hold %r9 #define holdd %r9d #define bits %r8d #define lcode %r14 #define len %ebx #define from %rcx #define dmax %r11d #define last -104(%rbp) #define beg -96(%rbp) #define end -88(%rbp) #define wsize -80(%rbp) #define whave -76(%rbp) #define window -72(%rbp) #define dcode -64(%rbp) #define lmask -56(%rbp) #define dmask -112(%rbp) #define wsize_write -116(%rbp) #define write_1 -128(%rbp) #define dist -44(%rbp) // reserve stack memory for local variables 128-40=88 subq $88, %rsp movq %rdi, strm movq 56(%rdi), state // state = (struct inflate_state FAR *)strm->state; movq (strm), in // in = strm->next_in - OFF; movl 8(strm), %eax // strm->avail_in subl $5, %eax // (strm->avail_in - 5) addq in, %rax // in + (strm->avail_in - 5) movq %rax, last // last = in + (strm->avail_in - 5) movq 24(strm), out // out = strm->next_out movl 32(strm), %eax // strm->avail_out subl %eax, %esi // (start - strm->avail_out); movq out, %rdx // strm->next_out subq %rsi, %rdx // out - (start - strm->avail_out); movq %rdx, beg // beg = out - (start - strm->avail_out); subl $257, %eax // (strm->avail_out - 257) addq out, %rax // out + (strm->avail_out - 257); movq %rax, end // end = out + (strm->avail_out - 257); #ifdef INFLATE_STRICT movl 20(state), dmax // dmax = state->dmax; #endif movl 52(state), %ecx // state->wsize movl %ecx, wsize // wsize = state->wsize; movl 56(state), %ebx // state->whave; movl %ebx, whave // whave = state->whave; movl 60(state), write // write = state->write; movq 64(state), %rax // state->window movq %rax, window // window = state->window; movq 72(state), hold // hold = state->hold; movl 80(state), bits // bits = state->bits; movq 96(state), lcode // lcode = state->lencode; movq 104(state), %rdx // state->distcode; movq %rdx, dcode // dcode = state->distcode; movl 116(state), %ecx // state->distbits movl $1, %eax movl %eax, %edx // 1 sall %cl, %edx // (1U << state->distbits) movl 112(state), %ecx // state->lenbits sall %cl, %eax // (1U << state->lenbits) decl %eax // (1U << state->lenbits) - 1 movq %rax, lmask // lmask = (1U << state->lenbits) - 1 decl %edx // (1U << state->distbits) - 1 movq %rdx, dmask // dmask = (1U << state->distbits) - 1 movl wsize, %ecx // wsize addl write, %ecx // wsize + write movl %ecx, wsize_write // wsize_write = wsize + write leal -1(%r15), %ebx // write - 1 movq %rbx, write_1 // write_1 = write - 1 L_do_while_loop: cmpl $14, bits // bits vs 14 ja 0f // if (bits < 15) { movzwl (in), %eax // read 2 bytes from in movl bits, %ecx // set up cl = bits salq %cl, %rax // (*in) << bits addq %rax, hold // hold += (*in) << bits addq $2, in // in += 2 addl $16, bits // bits += 16 0: // } movq lmask, %rax // lmask andq hold, %rax // hold & lmask jmp 1f .align 4,0x90 Lop_nonzero: movzbl %al, %ecx // op in al and cl testb $16, %cl // check for length base processing (op&16) jne L_length_base // if (op&16) branch to length base processing testb $64, %cl // check for 2nd level length code (op&64==0) jne L_end_of_block // if (op&64)!=0, branch for end-of-block processing /* 2nd level length code : (op&64) == 0*/ L_2nd_level_length_code: movl $1, %eax // 1 sall %cl, %eax // 1 << op decl %eax // ((1U << op) - 1) andq hold, %rax // (hold & ((1U << op) - 1)) movzwl %dx, %edx addq %rdx, %rax // this = lcode[this.val + (hold & ((1U << op) - 1))]; 1: movl (lcode,%rax,4), %eax // this = lcode[hold & lmask]; Ldolen: movl %eax, %edx // a copy of this shrl $16, %edx // edx = this.val; movzbl %ah, %ecx // op = this.bits shrq %cl, hold // hold >>= op; subl %ecx, bits // bits -= op; testb %al, %al // op = (unsigned)(this.op); jne Lop_nonzero // if (op!-0) branch for copy operation L_literal: movb %dl, (out) // *out = this.val incq out // out ++ L_do_while_loop_check: cmpq last, in // in vs last jae L_return_unused_byte // if in >= last, break to return unused byte processing cmpq end, out // out vs end jb L_do_while_loop // back to do_while_loop if out < end /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ L_return_unused_byte: movl out_d, %esi jmp L34 L_length_base: /* al = cl = op, edx = this.val, op&16 = 16 */ movzwl %dx, len // len = (unsigned)(this.val); movl %ecx, %edx // op andl $15, %edx // op &= 15; je 1f // if (op) { cmpl bits, %edx // op vs bits jbe 0f // if (bits < op) { movzbl (in), %eax // *in movl bits, %ecx // cl = bits salq %cl, %rax // *in << bits addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; incq in // in++ addl $8, bits // bits += 8 0: // } movl $1, %eax // 1 movl %edx, %ecx // cl = op sall %cl, %eax // 1 << op decl %eax // (1 << op) - 1 andl holdd, %eax // (unsigned)hold & ((1U << op) - 1); addl %eax, len // len += (unsigned)hold & ((1U << op) - 1); shrq %cl, hold // hold >>= op; subl %edx, bits // bits -= op; 1: // } cmpl $14, bits // bits vs 14 jbe L99 // if (bits < 15) go to loading to hold and return to L19 L19: // } movq dmask, %rax // dmask andq hold, %rax // hold & dmask movq dcode, %rdx // dcode[] movl (%rdx,%rax,4), %eax // this = dcode[hold & dmask]; jmp L_dodist .align 4,0x90 0: // op&16 == 0, test (op&64)==0 for 2nd level distance code testb $64, %cl // op&64 jne L_invalid_distance_code // if ((op&64)==0) { /* 2nd level distance code */ movl $1, %eax // 1 sall %cl, %eax // 1 << op decl %eax // (1 << op) - 1 andq hold, %rax // (hold & ((1U << op) - 1)) movzwl %dx, %edx // this.val addq %rdx, %rax // this.val + (hold & ((1U << op) - 1)) movq dcode, %rcx // dcode[] movl (%rcx,%rax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))]; L_dodist: movl %eax, %edx // this shrl $16, %edx // dist = (unsigned)(this.val); movzbl %ah, %ecx // cl = op = this.bits shrq %cl, hold // hold >>= op; subl %ecx, bits // bits -= op; movzbl %al, %ecx // op = (unsigned)(this.op); testb $16, %cl // (op & 16) test for distance base je 0b // if (op&16) == 0, branch to check for 2nd level distance code L_distance_base: /* distance base */ movl %ecx, %esi // op andl $15, %esi // op&=15 cmpl bits, %esi // op vs bits jbe 1f // if (bits < op) { movzbl (in), %eax // *in movl bits, %ecx // cl = bits salq %cl, %rax // *in << bits addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; incq in // in++ addl $8, bits // bits += 8 cmpl bits, %esi // op vs bits jbe 1f // if (bits < op) { movzbl (in), %eax // *in movl bits, %ecx // cl = bits salq %cl, %rax // *in << bits addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; incq in // in++ addl $8, bits // bits += 8 1: // } } movzwl %dx, %edx // dist movl $1, %eax // 1 movl %esi, %ecx // cl = op sall %cl, %eax // (1 << op) decl %eax // (1 << op) - 1 andl holdd, %eax // (unsigned)hold & ((1U << op) - 1) addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1); movl %eax, dist // save a copy of dist in stack #ifdef INFLATE_STRICT cmp %eax, dmax // dmax vs dist jb L_invalid_distance_too_far_back // if (dmax < dist) break for invalid distance too far back #endif shrq %cl, hold // hold >>= op; subl %esi, bits // bits -= op; movl out_d, %esi // out movl out_d, %eax // out subl beg, %eax // op = out - beg cmpl %eax, dist // dist vs op, /* see if copy from window */ jbe L_copy_direct_from_output // if (dist <= op) branch to copy direct from output L_distance_back_in_window: movl dist, %edx // dist subl %eax, %edx // op = dist - op; /* distance back in window */ cmpl %edx, whave // whave vs op jb L_invalid_distance_too_far_back // if (op > whave), break for invalid distance too far back testl write, write // if (write!=0) jne L_wrap_around_window // branch to wrap around window L_very_common_case: movl wsize, %eax // wsize subl %edx, %eax // wsize - op movq window, from // from = window - OFF; addq %rax, from // from += wsize - op; movl %edx, %esi // op cmpl %edx, len // len vs op ja L_some_from_window // if (len > op), branch for aligned code block L_some_from_window L38: subl $3, len // pre-decrement len by 3 jge 0f // if len >= 3, branch to the aligned code block 1: addl $3, len // post-increment len by 3 je L_do_while_loop_check // if (len==0) break to L_do_while_loop_check movzbl (from), %eax // *from movb %al, (out) // *out incq out // out++ cmpl $2, len // len vs 2 jne L_do_while_loop_check // if len!=2 break to L_do_while_loop_check movzbl 1(from), %eax // *from movb %al, (out) // *out incq out // out++ jmp L_do_while_loop_check // break to L_do_while_loop_check .align 4,0x90 0: // do { movzbl (from), %eax // *from movb %al, (out) // *out movzbl 1(from), %eax // *from movb %al, 1(out) // *out movzbl 2(from), %eax // *from movb %al, 2(out) // *out addq $3, out // out += 3 addq $3, from // from += 3 subl $3, len // len -= 3 jge 0b // } while (len>=0); jmp 1b // branch back to the possibly unaligned code .align 4,0x90 L_end_of_block: andl $32, %ecx // op & 32 jne L101 // if (op&32) branch to end-of-block break leaq LC2(%rip), from movq from, 48(strm) // state->mode movl $27, (state) // state->mode = BAD; movl out_d, %esi L34: movl bits, %eax // bits shrl $3, %eax // len = bits >> 3; mov %eax, %edx // len subq %rdx, in // in -= len sall $3, %eax // len << 3 movl bits, %ecx // bits subl %eax, %ecx // bits -= len << 3 movq in, (strm) // strm->next_in = in + OFF; movq out, 24(strm) // strm->next_out = out + OFF; cmpq in, last // last vs in jbe L67 // if (last <= in) branch to L67 and return to L69 movl last, %eax // last addl $5, %eax // last + 5 subl in_d, %eax // 5 + last - in L69: movl %eax, 8(strm) // update strm->avail_in cmpq end, out // out vs end jae L70 // if out<=end branch to L70 and return to L72 movl end, %eax // end addl $257, %eax // 257 + end subl %esi, %eax // 257 + end - out; L72: movl %eax, 32(strm) // update strm->avail_out movl $1, %eax // 1 sall %cl, %eax // 1 << bits decl %eax // (1U << bits) - 1 andq hold, %rax // hold &= (1U << bits) - 1; movq %rax, 72(state) // state->hold = hold; movl %ecx, 80(state) // state->bits = bits; // clear stack memory for local variables addq $88, %rsp // restore registers from stack popq %rbx popq %r12 popq %r13 popq %r14 popq %r15 // return to caller leave ret .align 4,0x90 L99: leal 8(bits), %esi // esi = bits+8 movzbl (in), %edx // 1st *in movl bits, %ecx // cl = bits salq %cl, %rdx // 1st *in << 8 addq %rdx, hold // 1st hold += (unsigned long)(PUP(in)) << bits; movzbl 1(in), %eax // 2nd *in movl %esi, %ecx // cl = bits + 8 salq %cl, %rax // 2nd *in << bits+8 addq %rax, hold // 2nd hold += (unsigned long)(PUP(in)) << bits; addq $2, in // in += 2 addl $16, bits // bits += 16 jmp L19 L101: movl $11, (state) movl out_d, %esi jmp L34 .align 4,0x90 L70: movl end, %eax // end subl %esi, %eax // end - out addl $257, %eax // 257 + end - out jmp L72 .align 4,0x90 L67: movl last, %eax // last subl in_d, %eax // last - in addl $5, %eax // 5 + last - in jmp L69 .align 4,0x90 // stuffing the following 4 bytes to align the major loop to a 16-byte boundary to give the better performance .byte 0 .byte 0 .byte 0 .byte 0 L_copy_direct_from_output: mov dist, %eax // dist movq out, %rdx // out subq %rax, %rdx // from = out - dist; subl $3, len // pre-decrement len by 3 // do { 0: movzbl (%rdx), %eax // *from movb %al, (out) // *out movzbl 1(%rdx), %eax // *from movb %al, 1(out) // *out movzbl 2(%rdx), %eax // *from movb %al, 2(out) // *out addq $3, out // out+=3 addq $3, %rdx // from+=3 subl $3, len // len-=3 jge 0b // } while (len>=0); 1: addl $3, len // post-increment len by 3 je L_do_while_loop_check // if len==0, branch to do_while_loop_check movzbl (%rdx), %eax // *from movb %al, (out) // *out incq out // out++ cmpl $2, len // len == 2 ? jne L_do_while_loop_check // if len==1, branch to do_while_loop_check movzbl 1(%rdx), %eax // *from movb %al, (out) // *out incq out // out++ jmp L_do_while_loop_check // branch to do_while_loop_check .align 4,0x90 L_some_from_window: // from : from, out, %esi/%edx = op // do { movzbl (from), %eax // *from movb %al, (out) // *out incq from // from++ incq out // out++ decl %esi // --op jne L_some_from_window // } while (op); subl %edx, len // len -= op; mov dist, %eax // dist movq out, from // out subq %rax, from // from = out - dist; jmp L38 // copy from output .align 4,0x90 L_wrap_around_window: cmpl %edx, write // write vs op jae L_contiguous_in_window // if (write >= op) branch to contiguous in window movl wsize_write, %eax // wsize+write subl %edx, %eax // wsize+write-op movq window, from // from = window - OFF addq %rax, from // from += wsize+write-op subl write, %edx // op -= write cmpl %edx, len // len vs op jbe L38 // if (len<=op) branch to copy from output subl %edx, len // len -= op; 0: // do { movzbl (from), %eax // *from movb %al, (out) // *out incq from // from++ incq out // out++ decl %edx // op-- jne 0b // } while (op); movq window, from cmpl len, write // write vs len jae L38 // if (write >= len) branch to copy from output movl write, %esi // op = write subl write, len // len -= op 1: // do { movzbl (from), %eax // *from movb %al, (out) // *out incq from // from++ incq out // out++ decl %esi // op-- jne 1b // } while (op); mov dist, %eax // dist movq out, from // out subq %rax, from // from = out - dist; jmp L38 .align 4,0x90 L_contiguous_in_window: movl write, %eax // write subl %edx, %eax // write - op movq window, from // from = window - OFF addq %rax, from // from += write - op cmpl %edx, len // len vs op jbe L38 // if (len <= op) branch to copy from output subl %edx, len // len -= op; 2: // do { movzbl (from), %eax // *from movb %al, (out) // *out incq from // from++ incq out // out++ decl %edx // op-- jne 2b // } while (op); mov dist, %eax // dist movq out, from // out subq %rax, from // from = out - dist; jmp L38 // copy from output .align 4,0x90 L_invalid_distance_code: leaq LC1(%rip), %rdx movq %rdx, 48(strm) movl $27, (state) movl out_d, %esi jmp L34 L_invalid_distance_too_far_back: leaq LC0(%rip), %rbx movq %rbx, 48(strm) // error message movl $27, (state) // state->mode = BAD jmp L34 #endif