#include // the follow assembly code was hard wired to POSTINC not defined, #if 0 // #ifdef POSTINC # define OFF 0 # define PUP(a) *(a)++ #else # define OFF 1 # define PUP(a) *++(a) #endif // the code uses r9, therefore, it does not meet the register protocol for armv5 and below // the code can only be used for armv6 and above #if defined _ARM_ARCH_6 .cstring .align 2 LC0: .ascii "invalid distance too far back\0" .align 2 LC1: .ascii "invalid distance code\0" .align 2 LC2: .ascii "invalid literal/length code\0" // renaming the register and stack memory use #define out r0 #define strm r10 #define state r5 #define in r11 #define write r9 #define distcode r8 #define bits lr #define hold r4 // stack memory allocation #define window_loc [sp,#0] #define last_loc [sp,#4] #define beg_loc [sp,#8] #define end_loc [sp,#12] #define wsize_loc [sp,#16] #define whave_loc [sp,#20] #define windowm1_loc [sp,#28] #define lmask_loc [sp,#32] #define dmask_loc [sp,#36] #define op_loc [sp,#44] #define dist_loc [sp,#48] #define local_size 52 // the following defines the variable offset in the inflate_state structure (in inflate.h) #define state_mode [state, #0] #define state_last [state, #4] #define state_wrap [state, #8] #define state_havedict [state, #12] #define state_flags [state, #16] #define state_dmax [state, #20] #define state_wbits [state, #36] #define state_wsize [state, #40] #define state_whave [state, #44] #define state_write [state, #48] #define state_window [state, #52] #define state_hold [state, #56] #define state_bits [state, #60] #define state_lencode [state, #76] #define state_distcode [state, #80] #define state_lenbits [state, #84] #define state_distbits [state, #88] // void inflate_fast(z_streamp strm, unsigned start) // input : // r0 = strm, (move to r10) // r1 = start .text .align 2 .globl _inflate_fast _inflate_fast: stmfd sp!, {r4-r6,r8-r11,lr} sub sp, sp, #local_size #if defined(_ARM_ARCH_5) ldrd r2,r3,[r0, #0] // r2 = strm->next_in, r3 = strm->avail_in #else ldmia r0, {r2-r3} #endif sub in, r2, #OFF // in = strm->next_in - OFF; sub r2, #(OFF+5) // next_in -= (OFF+5); ldr state, [r0, #28] // state = (struct inflate_state FAR *)strm->state; add r3, r3, r2 // last = next_in - OFF + (avail_in - 5); next_in already updated mov strm, r0 str r3, last_loc // store last to release r3 ldr r3, [r0, #12] // next_out ldr r2, [strm, #16] // avail_out sub out, r3, #OFF // out = strm->next_out - OFF; r0 is used as out from this point on sub r3, r3, #256 // next_out - 256 rsb r1, r2, r1 // start - avail_out sub r3, r3, #(1+OFF) // next_out-OFF-257 add r3, r3, r2 // r3 = end = avail_out + (next_out-OFF) - 257 = avail_out + out - 257 rsb r2, r1, out // r2 = beg = out - (start - avail_out); #if defined(_ARM_ARCH_5) strd r2,r3, beg_loc // store beg/end ldrd r2,r3, state_wsize // wsize/whave strd r2,r3, wsize_loc // store wsize/whave //ldrd r6,hold, state_window // window/hold, hold use r7 ldr r6, state_window // state->window ldr hold, state_hold // state->hold nop #else // for architecture < armv5, ldrd/strd is not available str r2, beg_loc // store beg str r3, end_loc // store end ldr r2, state_wsize // state->wsize ldr r3, state_whave // state->whave str r2, wsize_loc // store wsize str r3, whave_loc // store whave ldr r6, state_window // state->window ldr hold, state_hold // state->hold #endif ldr ip, state_lencode // lencode mov r3, #1 // used to derive lmask and dmask ldr write, state_write // write (r9 from this point on) : window write index nop str ip, [sp, #40] // save lencode sub ip, r6, #1 // window-1 str r6, window_loc // store window str ip, windowm1_loc // store window-1 ldr r2, state_lenbits // lenbits ldr bits, state_bits // bits, use lr from this point on ldr distcode, state_distcode// distcode, use r8 mov r2, r3, asl r2 // (1<lenbits) - 1; mov r3, r3, asl r12 // (1U << state->distbits) sub r3, r3, #1 // dmask = (1U << state->distbits) - 1; #if defined(_ARM_ARCH_5) strd r2, r3, lmask_loc // store lmask/dmask #else str r2, lmask_loc // lmask str r3, dmask_loc // dmask #endif // start the do loop decoding literals and length/distances // until end-of-block or not enough input data or output spaaaaaace do_loop: cmp bits, #15 // bits vs 15 ldr r1, lmask_loc // lmask bge bitsge15 // if bits >= 15, skip loading new 16 bits // this is a shortcut with the processor reads data in little-endian mode ldrh r3, [in,#1] // read 2 bytes add in, #2 // in pointer += 2 add hold, hold, r3, asl bits // deposit the new 2 bytes into hold add bits, #16 // bits count += 16 bitsge15: ldr ip, [sp, #40] // restore lencode and r3, hold, r1 // r3 = hold & lmask b dolen op_not_zero: tst r2, #16 // if (op&16) bne length_base // branch to length_base tst r2, #64 // else if (op&64) bne end_of_block // branch to end_of_block processing // 2nd-level length code, this is the part where if ((op & 64) == 0) { ... } // this.val + (hold & ((1U << op) - 1)); // r3 = r1 + hold & ((1< 8-bit code, 8-bit bits, 16-bit val ldrb r2, [ip,r3,asl #2] // op = (unsigned)(this.bits); add r3, ip, r3, asl #2 // r3 = this ldrb ip, [r3, #1] // ip = this.bits ldrh r1, [r3, #2] // r1 = this.value cmp r2, #0 // op == 0 ? mov hold, hold, lsr ip // hold >>= this.bits rsb bits, ip, bits // bits -= this.bits bne op_not_zero // branch to op_not_zero if this.op != 0 strb r1, [out, #1]! // PUP(out) = (unsigned char)(this.val); do_loop_while: ldr r1, last_loc // last ldr r2, end_loc // end cmp in, r1 // compare in vs last cmpcc out, r2 // if in < last, compare out vs end bcc do_loop // if (in < last && out < end) go back to do_loop update_state_and_return: sub r2, in, bits, lsr #3 // r2 = in - (bits>>3) add r3, r2, #OFF // r3 = (in - (bits>>3)) + OFF str r3, [strm, #0] // strm->next_in = in + OFF; add r3, out, #OFF // out + OFF str r3, [strm, #12] // strm->next_out = out + OFF; ldr r3, last_loc // r3 = last ldr ip, end_loc // ip = end cmp r3, r2 // compare last vs in addhi r3, r3, #5 // if last > in, last +=5 movls r6, r3 // o.w., r6 = last rsbls r3, r6, r2 // r3 = in-last rsbhi r3, r2, r3 // r3 = (last+5) - in rsbls r3, r3, #5 // r3 = 5 - (in-last); cmp out, ip // compare out vs end str r3, [strm, #4] // strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); movcs r2, ip // if out=end, r3 = end+256 rsbcs r3, r2, out // if out=end, r3 = end+257 rsbcs r3, r3, #256 // if out>3) << 3; rsbcc r3, out, r3 // if out=end, r3 = 257 + (end-out) str r3, [strm, #16] // strm->avail_out = (unsigned)(out < end ? 257 + (end - out) : 257 - (out - end)); // hold &= (1U << bits) - 1; rsb ip, bits, #32 // 32-bits ror hold, hold, bits // this is equivalent to hold<<(32-bits) lsr hold, hold, ip // logical shift right by (32-bits), hold now only keeps the bits LSBs str bits, state_bits // state->bits = bits; str hold, state_hold // state->hold = hold; add sp, #local_size // pop out stack memory ldmfd sp!,{r4-r6,r8-r11,pc} // restore registers and return length_base: // r2=op, r1=lmask ands r2, r2, #15 // op&=15; mov r6, r1 // len = (unsigned) this.val; beq op_is_zero // if op==0, branch to op_is_zero cmp r2, bits // op vs bits ldrbhi r3, [in, #1]! // if (op>bits) r3 = (PUP(in)); addhi hold, hold, r3, asl bits // if (op>bits) hold += (unsigned long)(PUP(in)) << bits; rsb ip, r2, #32 // 32-op ror r3, hold, r2 // (hold<<(32-op)) add r6, r1, r3, lsr ip // len += (unsigned)hold & ((1U << op) - 1); addhi bits, bits, #8 // if (op>bits) bits += 8; mov hold, hold, lsr r2 // hold >>= op; rsb bits, r2, bits // bits -= op; op_is_zero: cmp bits, #14 ldrh r3,[in,#1] // if (bits < 15) { 2 (PUP(in)); no condition code for better performance addls in, #2 // in+=2; addls hold, hold, r3, asl bits // twice hold += (unsigned long)(PUP(in)) << bits; addls bits, #16 // 2 bits += 8; } dodist: ldr r2, dmask_loc // r2 = dmask and r3, hold, r2 // r3 = hold & dmask mov r2, r3, asl #2 add r3, r2, distcode // &dcode[hold&dmask]; ldrb ip, [r2, distcode] // op ldrh r1, [r3, #2] // dist = (unsigned)(this.val); tst ip, #16 // op vs 16 ldrb r3, [r3, #1] // this.bits mov hold, hold, lsr r3 // hold >>= this.bits; rsb bits, r3, bits // bits -= this.bits; bne distance_base // if (op&16) { distance base processing } tst ip, #64 // beq second_distance_code // else if ((op&64)==0) branch to 2nd level distance code b invalide_distance_code check_2nd_level_distance_code: tst r2, #64 // check for esle if ((op & 64) == 0) for 2nd level distance code bne invalide_distance_code second_distance_code: rsb r2, ip, #32 // 32-op ror r3, hold, ip // hold<<(32-op) add r3, r1, r3, lsr r2 // this.val + (hold & ((1U << op) - 1)) mov r2, r3, asl #2 add r3, r2, distcode // this = dcode[this.val + (hold & ((1U << op) - 1))]; ldrb r2, [r2, distcode] // this.op ldrh r1, [r3, #2] // this.val tst r2, #16 // op&16 ldrb r3, [r3, #1] // this.bits mov ip, r2 // op mov hold, hold, lsr r3 // hold >> = this.bits rsb bits, r3, bits // bits -= this.bits beq check_2nd_level_distance_code distance_base: // this is invoked from if ((op&16)!=0) and r2, ip, #15 // op &= 15; cmp r2, bits // op vs bits ldrbhi r3, [in, #1]! // if (op > bits) (PUP(in)) addhi hold, hold, r3, asl bits // hold += (unsigned long)(PUP(in)) << bits; addhi bits, bits, #8 // bits += 8; cmphi r2, bits // internel (bits < op) ldrbhi r3, [in, #1]! // if (op > bits) (PUP(in)) addhi hold, hold, r3, asl bits // hold += (unsigned long)(PUP(in)) << bits; addhi bits, bits, #8 // bits += 8 rsb ip, r2, #32 // (32-op) ror r3, hold, r2 // hold<<(32-op) add r3, r1, r3, lsr ip // dist += (unsigned)hold & ((1U << op) - 1); str r3, dist_loc // save dist #ifdef INFLATE_STRICT ldr r1, state_dmax // r1 = dmax cmp r3, r1 // dist vs dmax bgt invalid_distance_too_far_back // if dist > dmax, set up msg/mode = bad and break #endif mov hold, hold, lsr r2 // hold >>= op ; rsb bits, r2, bits // bits -= op; ldr ip, beg_loc // beg ldr r1, dist_loc // dist rsb r3, ip, out // (out - beg); cmp r1, r3 // dist vs (out - beg) rsbls r2, r1, out // if (dist<=op) r2 = from = out-dist bls copy_direct_from_output // if (dist<=op) branch to copy_direct_from_output ldr r2, whave_loc // whave rsb r1, r3, r1 // op = dist-op cmp r2, r1 // whave vs op str r1, op_loc // save a copy of op bcc invalid_distance_too_far_back // if whave < op, message invalid distance too far back, and break cmp write, #0 // write bne non_very_common_case // if (write ==0) non_very_common_case // the following : if (write == 0) { /* very common case */ } ldr r1, op_loc // restore op in r1 ldr ip, wsize_loc // wsize cmp r6, r1 // len vs op rsb r3, r1, ip // wsize - op ldr ip, windowm1_loc // window - 1 add r2, ip, r3 // from = window - 1 + wsize - op : setup for using PUP(from) //movhi r3, r1 // if len > op, r3 = op //movhi r1, out // if len > op, r1 = out bhi some_from_window // if (len > op), branch to some_from_window finish_copy: // while (len > 2) { // PUP(out) = PUP(from); // PUP(out) = PUP(from); // PUP(out) = PUP(from); // len -= 3; // } // if (len) { // PUP(out) = PUP(from); // if (len > 1) // PUP(out) = PUP(from); // } cmp r6, #2 // len > 2 ? movls r1, r6 // if (len<=2) r1 = len bls lenle2 // if (len<=2) branch to lenle2 mov r1, r6 fcopy_per3bytes: ldrb r3, [r2, #1] // 1st PUP(from) sub r1, r1, #3 // len-=3 cmp r1, #2 // len > 2 ? strb r3, [out, #1] // 1st PUP(out) = PUP(from); ldrb r3, [r2, #2] // 2nd PUP(from) add r2, r2, #3 // from+=3 strb r3, [out, #2] // 2nd PUP(out) = PUP(from); ldrb r3, [r2, #0] // 3rd PUP(from) add out, out, #3 // out+=3 strb r3, [out, #0] // 3rd PUP(out) = PUP(from); bgt fcopy_per3bytes // while (len>3) back to loop head lenle2: cmp r1, #0 // len beq do_loop_while // back to while loop head if len==0 ldrb r3, [r2, #1] // PUP(from) cmp r1, #2 // check whether len==2 strb r3, [out, #1]! // PUP(out) = PUP(from); bne do_loop_while // back to while loop head if len==1 ldrb r3, [r2, #2] // 2nd PUP(from) strb r3, [out, #1]! // 2nd PUP(out) = PUP(from); b do_loop_while // back to while loop head end_of_block: tst r2, #32 // if (op&32) movne r3, #11 // TYPE? strne r3, state_mode // state-mode = TYPE bne update_state_and_return // break the do loop and branch to get ready to return ldr r3, messages // "invalid literal/length code" message L75: add r3, pc, r3 str r3, [strm, #24] // strm->msg = (char *)"invalid literal/length code"; mov r3, #27 // BAD? str r3, state_mode // state->mode = BAD; b update_state_and_return // break the do loop and branch to get ready to return //Read_2_bytes: // ldrh r3,[in,#1] // 2 (PUP(in)) together // add in, #2 // 2 in++ // add hold, hold, r3, asl bits // twice hold += (unsigned long)(PUP(in)) << bits; // add bits, #16 // 2 bits += 8; // b dodist // branch to dodist nop // a pad dummy instruction to give better performance copy_direct_from_output: // r2 = from = out - dist ; // do { ldrb r3, [r2, #1] // 1st PUP(from) sub r6, r6, #3 // len-=3 cmp r6, #2 // len vs 2 strb r3, [out, #1] // 1st PUP(out) = PUP(from); ldrb r3, [r2, #2] // 2nd PUP(from) add r2, r2, #3 // update from+=3 strb r3, [out, #2] // 2nd PUP(out) = PUP(from); ldrb r3, [r2, #0] // 3rd PUP(from); add out, out, #3 // update out+=3 strb r3, [out, #0] // 3rd PUP(out) = PUP(from); bhi copy_direct_from_output // while (len>2); // len in r6 can now be 0 1 or 2 subs r6,#1 // len--; ldrb r3, [r2, #1] // PUP(from) blt do_loop_while // if len<0 back to while loop head strb r3, [out, #1]! // PUP(out) = PUP(from); subs r6, #1 // len--; ldrb r3, [r2, #2] // 2nd PUP(from) blt do_loop_while // if len<0 back to while loop head strb r3, [out, #1]! // 2nd PUP(out) = PUP(from); b do_loop_while // back to while loop head invalide_distance_code: ldr r3, messages+4 // "invalid distance code" L72: add r3, pc, r3 str r3, [strm, #24] // strm->msg = (char *)"invalid distance code"; mov r3, #27 str r3, state_mode // state->mode = BAD; b update_state_and_return // break, restore registers, and return some_from_window: ldr r3, dist_loc // dist rsb r6, r1, r6 // len -= op some_from_window_loop: // do { ldrb ip, [r2, #1]! // PUP(from); subs r1, #1 // --op strb ip, [out, #1]! // PUP(out) = PUP(from); bne some_from_window_loop // } while(op); rsb r2, r3, out // from = out - dist; b finish_copy non_very_common_case: ldr r1, op_loc // restore op in r1 cmp write, r1 // write vs op bcs contiguous_in_window // if (write >= op) branch to contiguous_in_window /* wrap around window */ ldr r2, wsize_loc // wsize ldr ip, windowm1_loc // window-1 add r3, write, r2 // r3 = wsize+write rsb r3, r1, r3 // r3 = wsize+write-op add r2, ip, r3 // r2 = from = wsize+write-op+window-1; rsb r1, write, r1 // op -= write; cmp r6, r1 // len vs op bls finish_copy // if (len <= op) branch to finish_copy rsb r6, r1, r6 // len -= op waw_loop: // do { ldrb r3, [r2, #1]! // PUP(from) subs r1, r1, #1 // --op; strb r3, [out, #1]! // PUP(out) = PUP(from); bne waw_loop // } while (op); cmp write, r6 // write vs len ldr r2, windowm1_loc // if (write>=len) r2 = from = window-1; bcs finish_copy // if (write>=len) branch to finish_copy // some from start of window mov r1, write // op = write sub r6, write // len -= op sow_loop: // do { ldrb r3,[r2, #1]! // PUP(from) subs r1, #1 // --op; strb r3, [out,#1]! // PUP(out) = PUP(from); bne sow_loop // } while (op); ldr r2, dist_loc // dist rsb r2, r2, out // r2 = from = out-dist b finish_copy // continue to finish_copy contiguous_in_window: ldr ip, windowm1_loc // window-1 cmp r6, r1 // len vs op rsb r3, r1, write // r3 = write-op add r2, ip, r3 // r2 = from = window+write-op-1 bls finish_copy // if (len <= op) branch to finish_copy rsb r6, r1, r6 // len -= op ldr r3, dist_loc // dist ciw_loop: ldrb ip, [r2, #1]! // PUP(from) subs r1, r1, #1 // op-- strb ip, [out, #1]! // PUP(out) = PUP(from); bne ciw_loop // while (--op); rsb r2, r3, out // from = out - dist; b finish_copy invalid_distance_too_far_back: ldr r3, messages+8 // "invalid distance too far back" L42: add r3, pc, r3 str r3, [strm, #24] // strm->msg = (char *)"invalid distance too far back"; mov r3, #27 str r3, state_mode // state->mode = BAD; b update_state_and_return // break, restore registers, and return .align 2 messages: .long LC2-8-(L75) .long LC1-8-(L72) .long LC0-8-(L42) #endif // defined _ARM_ARCH_6