1239310Sdim;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
2239310Sdim; *
3239310Sdim; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
4239310Sdim; *
5239310Sdim; * Copyright (C) 1995-2003 Mark Adler
6239310Sdim; * For conditions of distribution and use, see copyright notice in zlib.h
7239310Sdim; *
8239310Sdim; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9239310Sdim; * Please use the copyright conditions above.
10239310Sdim; *
11239310Sdim; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12239310Sdim; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
13239310Sdim; * the moment.  I have successfully compiled and tested this code with gcc2.96,
14239310Sdim; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
15239310Sdim; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16239310Sdim; * enabled.  I will attempt to merge the MMX code into this version.  Newer
17239310Sdim; * versions of this and inffast.S can be found at
18239310Sdim; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
19239310Sdim; *
20239310Sdim; * 2005 : modification by Gilles Vollant
21239310Sdim; */
22239310Sdim; For Visual C++ 4.x and higher and ML 6.x and higher
23239310Sdim;   ml.exe is in directory \MASM611C of Win95 DDK
24251662Sdim;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25276479Sdim;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
26276479Sdim;
27276479Sdim;
28276479Sdim;   compile with command line option
29276479Sdim;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
30276479Sdim
31239310Sdim;   if you define NO_GZIP (see inflate.h), compile with
32239310Sdim;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
33276479Sdim
34276479Sdim
35276479Sdim; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36276479Sdim; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37276479Sdim;        in inflate_state in inflate.h)
38251662Sdimzlib1222sup      equ    8
39251662Sdim
40276479Sdim
41276479SdimIFDEF GUNZIP
42276479Sdim  INFLATE_MODE_TYPE    equ 11
43276479Sdim  INFLATE_MODE_BAD     equ 26
44251662SdimELSE
45239310Sdim  IFNDEF NO_GUNZIP
46239310Sdim    INFLATE_MODE_TYPE    equ 11
47239310Sdim    INFLATE_MODE_BAD     equ 26
48239310Sdim  ELSE
49276479Sdim    INFLATE_MODE_TYPE    equ 3
50276479Sdim    INFLATE_MODE_BAD     equ 17
51239310Sdim  ENDIF
52239310SdimENDIF
53239310Sdim
54239310Sdim
55239310Sdim; 75 "inffast.S"
56239310Sdim;FILE "inffast.S"
57239310Sdim
58239310Sdim;;;GLOBAL _inflate_fast
59239310Sdim
60239310Sdim;;;SECTION .text
61251662Sdim
62251662Sdim
63251662Sdim
64251662Sdim	.586p
65251662Sdim	.mmx
66239310Sdim
67239310Sdim	name	inflate_fast_x86
68239310Sdim	.MODEL	FLAT
69239310Sdim
70239310Sdim_DATA			segment
71239310Sdiminflate_fast_use_mmx:
72239310Sdim	dd	1
73239310Sdim
74251662Sdim
75251662Sdim_TEXT			segment
76251662Sdim
77239310Sdim
78276479Sdim
79251662SdimALIGN 4
80239310Sdim	db	'Fast decoding Code from Chris Anderson'
81239310Sdim	db	0
82239310Sdim
83276479SdimALIGN 4
84239310Sdiminvalid_literal_length_code_msg:
85239310Sdim	db	'invalid literal/length code'
86239310Sdim	db	0
87276479Sdim
88239310SdimALIGN 4
89239310Sdiminvalid_distance_code_msg:
90251662Sdim	db	'invalid distance code'
91251662Sdim	db	0
92239310Sdim
93239310SdimALIGN 4
94239310Sdiminvalid_distance_too_far_msg:
95239310Sdim	db	'invalid distance too far back'
96239310Sdim	db	0
97239310Sdim
98239310Sdim
99251662SdimALIGN 4
100251662Sdiminflate_fast_mask:
101251662Sdimdd	0
102251662Sdimdd	1
103251662Sdimdd	3
104251662Sdimdd	7
105251662Sdimdd	15
106239310Sdimdd	31
107239310Sdimdd	63
108239310Sdimdd	127
109239310Sdimdd	255
110239310Sdimdd	511
111239310Sdimdd	1023
112239310Sdimdd	2047
113239310Sdimdd	4095
114239310Sdimdd	8191
115239310Sdimdd	16383
116239310Sdimdd	32767
117239310Sdimdd	65535
118239310Sdimdd	131071
119239310Sdimdd	262143
120239310Sdimdd	524287
121239310Sdimdd	1048575
122239310Sdimdd	2097151
123239310Sdimdd	4194303
124239310Sdimdd	8388607
125239310Sdimdd	16777215
126239310Sdimdd	33554431
127239310Sdimdd	67108863
128239310Sdimdd	134217727
129239310Sdimdd	268435455
130239310Sdimdd	536870911
131239310Sdimdd	1073741823
132239310Sdimdd	2147483647
133239310Sdimdd	4294967295
134239310Sdim
135239310Sdim
136239310Sdimmode_state	 equ	0	;/* state->mode	*/
137239310Sdimwsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
138239310Sdimwrite_state	 equ	(36+4+zlib1222sup)	;/* state->write */
139239310Sdimwindow_state	 equ	(40+4+zlib1222sup)	;/* state->window */
140239310Sdimhold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
141239310Sdimbits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
142239310Sdimlencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
143239310Sdimdistcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
144239310Sdimlenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
145239310Sdimdistbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
146239310Sdim
147239310Sdim
148239310Sdim;;SECTION .text
149239310Sdim; 205 "inffast.S"
150239310Sdim;GLOBAL	inflate_fast_use_mmx
151239310Sdim
152239310Sdim;SECTION .data
153239310Sdim
154239310Sdim
155239310Sdim; GLOBAL inflate_fast_use_mmx:object
156239310Sdim;.size inflate_fast_use_mmx, 4
157239310Sdim; 226 "inffast.S"
158239310Sdim;SECTION .text
159239310Sdim
160239310SdimALIGN 4
161239310Sdim_inflate_fast proc near
162239310Sdim.FPO (16, 4, 0, 0, 1, 0)
163239310Sdim	push  edi
164239310Sdim	push  esi
165239310Sdim	push  ebp
166239310Sdim	push  ebx
167239310Sdim	pushfd
168239310Sdim	sub  esp,64
169239310Sdim	cld
170239310Sdim
171239310Sdim
172239310Sdim
173239310Sdim
174239310Sdim	mov  esi, [esp+88]
175239310Sdim	mov  edi, [esi+28]
176239310Sdim
177239310Sdim
178239310Sdim
179280031Sdim
180239310Sdim
181239310Sdim
182239310Sdim
183239310Sdim	mov  edx, [esi+4]
184239310Sdim	mov  eax, [esi+0]
185239310Sdim
186239310Sdim	add  edx,eax
187239310Sdim	sub  edx,11
188239310Sdim
189239310Sdim	mov  [esp+44],eax
190239310Sdim	mov  [esp+20],edx
191239310Sdim
192239310Sdim	mov  ebp, [esp+92]
193239310Sdim	mov  ecx, [esi+16]
194239310Sdim	mov  ebx, [esi+12]
195239310Sdim
196239310Sdim	sub  ebp,ecx
197239310Sdim	neg  ebp
198239310Sdim	add  ebp,ebx
199239310Sdim
200239310Sdim	sub  ecx,257
201239310Sdim	add  ecx,ebx
202280031Sdim
203239310Sdim	mov  [esp+60],ebx
204239310Sdim	mov  [esp+40],ebp
205239310Sdim	mov  [esp+16],ecx
206239310Sdim; 285 "inffast.S"
207239310Sdim	mov  eax, [edi+lencode_state]
208239310Sdim	mov  ecx, [edi+distcode_state]
209239310Sdim
210239310Sdim	mov  [esp+8],eax
211239310Sdim	mov  [esp+12],ecx
212239310Sdim
213239310Sdim	mov  eax,1
214239310Sdim	mov  ecx, [edi+lenbits_state]
215239310Sdim	shl  eax,cl
216239310Sdim	dec  eax
217239310Sdim	mov  [esp+0],eax
218239310Sdim
219239310Sdim	mov  eax,1
220239310Sdim	mov  ecx, [edi+distbits_state]
221239310Sdim	shl  eax,cl
222239310Sdim	dec  eax
223239310Sdim	mov  [esp+4],eax
224239310Sdim
225239310Sdim	mov  eax, [edi+wsize_state]
226239310Sdim	mov  ecx, [edi+write_state]
227239310Sdim	mov  edx, [edi+window_state]
228239310Sdim
229239310Sdim	mov  [esp+52],eax
230280031Sdim	mov  [esp+48],ecx
231280031Sdim	mov  [esp+56],edx
232239310Sdim
233239310Sdim	mov  ebp, [edi+hold_state]
234239310Sdim	mov  ebx, [edi+bits_state]
235239310Sdim; 321 "inffast.S"
236251662Sdim	mov  esi, [esp+44]
237239310Sdim	mov  ecx, [esp+20]
238239310Sdim	cmp  ecx,esi
239239310Sdim	ja   L_align_long
240239310Sdim
241239310Sdim	add  ecx,11
242239310Sdim	sub  ecx,esi
243239310Sdim	mov  eax,12
244239310Sdim	sub  eax,ecx
245239310Sdim	lea  edi, [esp+28]
246239310Sdim	rep movsb
247239310Sdim	mov  ecx,eax
248239310Sdim	xor  eax,eax
249239310Sdim	rep stosb
250239310Sdim	lea  esi, [esp+28]
251239310Sdim	mov  [esp+20],esi
252239310Sdim	jmp  L_is_aligned
253239310Sdim
254239310Sdim
255239310SdimL_align_long:
256239310Sdim	test  esi,3
257239310Sdim	jz   L_is_aligned
258239310Sdim	xor  eax,eax
259239310Sdim	mov  al, [esi]
260239310Sdim	inc  esi
261239310Sdim	mov  ecx,ebx
262239310Sdim	add  ebx,8
263239310Sdim	shl  eax,cl
264239310Sdim	or  ebp,eax
265239310Sdim	jmp L_align_long
266239310Sdim
267239310SdimL_is_aligned:
268239310Sdim	mov  edi, [esp+60]
269239310Sdim; 366 "inffast.S"
270239310SdimL_check_mmx:
271239310Sdim	cmp  dword ptr [inflate_fast_use_mmx],2
272239310Sdim	je   L_init_mmx
273239310Sdim	ja   L_do_loop
274239310Sdim
275239310Sdim	push  eax
276239310Sdim	push  ebx
277239310Sdim	push  ecx
278239310Sdim	push  edx
279239310Sdim	pushfd
280239310Sdim	mov  eax, [esp]
281239310Sdim	xor  dword ptr [esp],0200000h
282239310Sdim
283239310Sdim
284239310Sdim
285239310Sdim
286239310Sdim	popfd
287251662Sdim	pushfd
288251662Sdim	pop  edx
289251662Sdim	xor  edx,eax
290251662Sdim	jz   L_dont_use_mmx
291251662Sdim	xor  eax,eax
292251662Sdim	cpuid
293251662Sdim	cmp  ebx,0756e6547h
294251662Sdim	jne  L_dont_use_mmx
295251662Sdim	cmp  ecx,06c65746eh
296251662Sdim	jne  L_dont_use_mmx
297251662Sdim	cmp  edx,049656e69h
298251662Sdim	jne  L_dont_use_mmx
299251662Sdim	mov  eax,1
300239310Sdim	cpuid
301280031Sdim	shr  eax,8
302251662Sdim	and  eax,15
303251662Sdim	cmp  eax,6
304239310Sdim	jne  L_dont_use_mmx
305280031Sdim	test  edx,0800000h
306239310Sdim	jnz  L_use_mmx
307251662Sdim	jmp  L_dont_use_mmx
308251662SdimL_use_mmx:
309239310Sdim	mov  dword ptr [inflate_fast_use_mmx],2
310251662Sdim	jmp  L_check_mmx_pop
311251662SdimL_dont_use_mmx:
312239310Sdim	mov  dword ptr [inflate_fast_use_mmx],3
313239310SdimL_check_mmx_pop:
314280031Sdim	pop  edx
315239310Sdim	pop  ecx
316251662Sdim	pop  ebx
317251662Sdim	pop  eax
318239310Sdim	jmp  L_check_mmx
319251662Sdim; 426 "inffast.S"
320251662SdimALIGN 4
321239310SdimL_do_loop:
322239310Sdim; 437 "inffast.S"
323280031Sdim	cmp  bl,15
324239310Sdim	ja   L_get_length_code
325251662Sdim
326251662Sdim	xor  eax,eax
327239310Sdim	lodsw
328251662Sdim	mov  cl,bl
329251662Sdim	add  bl,16
330239310Sdim	shl  eax,cl
331239310Sdim	or  ebp,eax
332280031Sdim
333239310SdimL_get_length_code:
334251662Sdim	mov  edx, [esp+0]
335251662Sdim	mov  ecx, [esp+8]
336239310Sdim	and  edx,ebp
337251662Sdim	mov  eax, [ecx+edx*4]
338251662Sdim
339239310SdimL_dolen:
340239310Sdim
341280031Sdim
342251662Sdim
343251662Sdim
344239310Sdim
345239310Sdim
346239310Sdim	mov  cl,ah
347239310Sdim	sub  bl,ah
348239310Sdim	shr  ebp,cl
349239310Sdim
350239310Sdim
351239310Sdim
352239310Sdim
353239310Sdim
354239310Sdim
355239310Sdim	test  al,al
356243830Sdim	jnz   L_test_for_length_base
357239310Sdim
358239310Sdim	shr  eax,16
359239310Sdim	stosb
360239310Sdim
361239310SdimL_while_test:
362239310Sdim
363239310Sdim
364280031Sdim	cmp  [esp+16],edi
365280031Sdim	jbe  L_break_loop
366280031Sdim
367251662Sdim	cmp  [esp+20],esi
368239310Sdim	ja   L_do_loop
369239310Sdim	jmp  L_break_loop
370239310Sdim
371239310SdimL_test_for_length_base:
372239310Sdim; 502 "inffast.S"
373239310Sdim	mov  edx,eax
374239310Sdim	shr  edx,16
375239310Sdim	mov  cl,al
376239310Sdim
377239310Sdim	test  al,16
378239310Sdim	jz   L_test_for_second_level_length
379239310Sdim	and  cl,15
380239310Sdim	jz   L_save_len
381239310Sdim	cmp  bl,cl
382239310Sdim	jae  L_add_bits_to_len
383239310Sdim
384239310Sdim	mov  ch,cl
385239310Sdim	xor  eax,eax
386239310Sdim	lodsw
387239310Sdim	mov  cl,bl
388239310Sdim	add  bl,16
389239310Sdim	shl  eax,cl
390239310Sdim	or  ebp,eax
391239310Sdim	mov  cl,ch
392239310Sdim
393239310SdimL_add_bits_to_len:
394239310Sdim	mov  eax,1
395239310Sdim	shl  eax,cl
396276479Sdim	dec  eax
397276479Sdim	sub  bl,cl
398239310Sdim	and  eax,ebp
399239310Sdim	shr  ebp,cl
400239310Sdim	add  edx,eax
401239310Sdim
402239310SdimL_save_len:
403239310Sdim	mov  [esp+24],edx
404239310Sdim
405239310Sdim
406239310SdimL_decode_distance:
407239310Sdim; 549 "inffast.S"
408239310Sdim	cmp  bl,15
409239310Sdim	ja   L_get_distance_code
410239310Sdim
411239310Sdim	xor  eax,eax
412239310Sdim	lodsw
413239310Sdim	mov  cl,bl
414239310Sdim	add  bl,16
415280031Sdim	shl  eax,cl
416280031Sdim	or  ebp,eax
417280031Sdim
418280031SdimL_get_distance_code:
419280031Sdim	mov  edx, [esp+4]
420280031Sdim	mov  ecx, [esp+12]
421239310Sdim	and  edx,ebp
422239310Sdim	mov  eax, [ecx+edx*4]
423239310Sdim
424239310Sdim
425239310SdimL_dodist:
426239310Sdim	mov  edx,eax
427239310Sdim	shr  edx,16
428239310Sdim	mov  cl,ah
429239310Sdim	sub  bl,ah
430239310Sdim	shr  ebp,cl
431239310Sdim; 584 "inffast.S"
432239310Sdim	mov  cl,al
433239310Sdim
434239310Sdim	test  al,16
435239310Sdim	jz  L_test_for_second_level_dist
436239310Sdim	and  cl,15
437239310Sdim	jz  L_check_dist_one
438239310Sdim	cmp  bl,cl
439239310Sdim	jae  L_add_bits_to_dist
440239310Sdim
441239310Sdim	mov  ch,cl
442239310Sdim	xor  eax,eax
443239310Sdim	lodsw
444239310Sdim	mov  cl,bl
445239310Sdim	add  bl,16
446239310Sdim	shl  eax,cl
447239310Sdim	or  ebp,eax
448239310Sdim	mov  cl,ch
449239310Sdim
450239310SdimL_add_bits_to_dist:
451239310Sdim	mov  eax,1
452239310Sdim	shl  eax,cl
453239310Sdim	dec  eax
454239310Sdim	sub  bl,cl
455239310Sdim	and  eax,ebp
456280031Sdim	shr  ebp,cl
457280031Sdim	add  edx,eax
458280031Sdim	jmp  L_check_window
459239310Sdim
460239310SdimL_check_window:
461239310Sdim; 625 "inffast.S"
462239310Sdim	mov  [esp+44],esi
463239310Sdim	mov  eax,edi
464239310Sdim	sub  eax, [esp+40]
465239310Sdim
466239310Sdim	cmp  eax,edx
467239310Sdim	jb   L_clip_window
468239310Sdim
469239310Sdim	mov  ecx, [esp+24]
470239310Sdim	mov  esi,edi
471239310Sdim	sub  esi,edx
472239310Sdim
473239310Sdim	sub  ecx,3
474239310Sdim	mov  al, [esi]
475239310Sdim	mov  [edi],al
476239310Sdim	mov  al, [esi+1]
477239310Sdim	mov  dl, [esi+2]
478239310Sdim	add  esi,3
479239310Sdim	mov  [edi+1],al
480239310Sdim	mov  [edi+2],dl
481239310Sdim	add  edi,3
482239310Sdim	rep movsb
483239310Sdim
484239310Sdim	mov  esi, [esp+44]
485239310Sdim	jmp  L_while_test
486239310Sdim
487239310SdimALIGN 4
488239310SdimL_check_dist_one:
489239310Sdim	cmp  edx,1
490239310Sdim	jne  L_check_window
491239310Sdim	cmp  [esp+40],edi
492239310Sdim	je  L_check_window
493239310Sdim
494239310Sdim	dec  edi
495239310Sdim	mov  ecx, [esp+24]
496239310Sdim	mov  al, [edi]
497239310Sdim	sub  ecx,3
498239310Sdim
499239310Sdim	mov  [edi+1],al
500239310Sdim	mov  [edi+2],al
501239310Sdim	mov  [edi+3],al
502239310Sdim	add  edi,4
503239310Sdim	rep stosb
504239310Sdim
505239310Sdim	jmp  L_while_test
506239310Sdim
507239310SdimALIGN 4
508239310SdimL_test_for_second_level_length:
509239310Sdim
510239310Sdim
511239310Sdim
512239310Sdim
513239310Sdim	test  al,64
514239310Sdim	jnz   L_test_for_end_of_block
515239310Sdim
516239310Sdim	mov  eax,1
517239310Sdim	shl  eax,cl
518239310Sdim	dec  eax
519239310Sdim	and  eax,ebp
520239310Sdim	add  eax,edx
521239310Sdim	mov  edx, [esp+8]
522239310Sdim	mov  eax, [edx+eax*4]
523239310Sdim	jmp  L_dolen
524239310Sdim
525239310SdimALIGN 4
526239310SdimL_test_for_second_level_dist:
527239310Sdim
528239310Sdim
529239310Sdim
530239310Sdim
531239310Sdim	test  al,64
532239310Sdim	jnz   L_invalid_distance_code
533239310Sdim
534239310Sdim	mov  eax,1
535239310Sdim	shl  eax,cl
536239310Sdim	dec  eax
537239310Sdim	and  eax,ebp
538239310Sdim	add  eax,edx
539239310Sdim	mov  edx, [esp+12]
540239310Sdim	mov  eax, [edx+eax*4]
541239310Sdim	jmp  L_dodist
542239310Sdim
543239310SdimALIGN 4
544239310SdimL_clip_window:
545239310Sdim; 721 "inffast.S"
546239310Sdim	mov  ecx,eax
547280031Sdim	mov  eax, [esp+52]
548251662Sdim	neg  ecx
549239310Sdim	mov  esi, [esp+56]
550239310Sdim
551239310Sdim	cmp  eax,edx
552239310Sdim	jb   L_invalid_distance_too_far
553239310Sdim
554239310Sdim	add  ecx,edx
555239310Sdim	cmp  dword ptr [esp+48],0
556239310Sdim	jne  L_wrap_around_window
557239310Sdim
558239310Sdim	sub  eax,ecx
559239310Sdim	add  esi,eax
560239310Sdim; 749 "inffast.S"
561239310Sdim	mov  eax, [esp+24]
562239310Sdim	cmp  eax,ecx
563239310Sdim	jbe  L_do_copy1
564239310Sdim
565239310Sdim	sub  eax,ecx
566239310Sdim	rep movsb
567239310Sdim	mov  esi,edi
568239310Sdim	sub  esi,edx
569239310Sdim	jmp  L_do_copy1
570239310Sdim
571239310Sdim	cmp  eax,ecx
572239310Sdim	jbe  L_do_copy1
573239310Sdim
574239310Sdim	sub  eax,ecx
575239310Sdim	rep movsb
576239310Sdim	mov  esi,edi
577239310Sdim	sub  esi,edx
578239310Sdim	jmp  L_do_copy1
579239310Sdim
580239310SdimL_wrap_around_window:
581239310Sdim; 793 "inffast.S"
582239310Sdim	mov  eax, [esp+48]
583239310Sdim	cmp  ecx,eax
584239310Sdim	jbe  L_contiguous_in_window
585239310Sdim
586239310Sdim	add  esi, [esp+52]
587239310Sdim	add  esi,eax
588239310Sdim	sub  esi,ecx
589239310Sdim	sub  ecx,eax
590239310Sdim
591239310Sdim
592239310Sdim	mov  eax, [esp+24]
593239310Sdim	cmp  eax,ecx
594239310Sdim	jbe  L_do_copy1
595239310Sdim
596239310Sdim	sub  eax,ecx
597239310Sdim	rep movsb
598239310Sdim	mov  esi, [esp+56]
599239310Sdim	mov  ecx, [esp+48]
600239310Sdim	cmp  eax,ecx
601239310Sdim	jbe  L_do_copy1
602251662Sdim
603251662Sdim	sub  eax,ecx
604239310Sdim	rep movsb
605239310Sdim	mov  esi,edi
606239310Sdim	sub  esi,edx
607251662Sdim	jmp  L_do_copy1
608239310Sdim
609239310SdimL_contiguous_in_window:
610239310Sdim; 836 "inffast.S"
611239310Sdim	add  esi,eax
612239310Sdim	sub  esi,ecx
613251662Sdim
614280031Sdim
615280031Sdim	mov  eax, [esp+24]
616251662Sdim	cmp  eax,ecx
617251662Sdim	jbe  L_do_copy1
618251662Sdim
619239310Sdim	sub  eax,ecx
620239310Sdim	rep movsb
621239310Sdim	mov  esi,edi
622251662Sdim	sub  esi,edx
623251662Sdim
624251662SdimL_do_copy1:
625251662Sdim; 862 "inffast.S"
626251662Sdim	mov  ecx,eax
627251662Sdim	rep movsb
628239310Sdim
629239310Sdim	mov  esi, [esp+44]
630239310Sdim	jmp  L_while_test
631239310Sdim; 878 "inffast.S"
632261991SdimALIGN 4
633239310SdimL_init_mmx:
634239310Sdim	emms
635239310Sdim
636239310Sdim
637239310Sdim
638239310Sdim
639239310Sdim
640239310Sdim	movd mm0,ebp
641239310Sdim	mov  ebp,ebx
642239310Sdim; 896 "inffast.S"
643239310Sdim	movd mm4,dword ptr [esp+0]
644239310Sdim	movq mm3,mm4
645239310Sdim	movd mm5,dword ptr [esp+4]
646239310Sdim	movq mm2,mm5
647239310Sdim	pxor mm1,mm1
648239310Sdim	mov  ebx, [esp+8]
649239310Sdim	jmp  L_do_loop_mmx
650239310Sdim
651239310SdimALIGN 4
652239310SdimL_do_loop_mmx:
653239310Sdim	psrlq mm0,mm1
654239310Sdim
655239310Sdim	cmp  ebp,32
656	ja  L_get_length_code_mmx
657
658	movd mm6,ebp
659	movd mm7,dword ptr [esi]
660	add  esi,4
661	psllq mm7,mm6
662	add  ebp,32
663	por mm0,mm7
664
665L_get_length_code_mmx:
666	pand mm4,mm0
667	movd eax,mm4
668	movq mm4,mm3
669	mov  eax, [ebx+eax*4]
670
671L_dolen_mmx:
672	movzx  ecx,ah
673	movd mm1,ecx
674	sub  ebp,ecx
675
676	test  al,al
677	jnz L_test_for_length_base_mmx
678
679	shr  eax,16
680	stosb
681
682L_while_test_mmx:
683
684
685	cmp  [esp+16],edi
686	jbe L_break_loop
687
688	cmp  [esp+20],esi
689	ja L_do_loop_mmx
690	jmp L_break_loop
691
692L_test_for_length_base_mmx:
693
694	mov  edx,eax
695	shr  edx,16
696
697	test  al,16
698	jz  L_test_for_second_level_length_mmx
699	and  eax,15
700	jz L_decode_distance_mmx
701
702	psrlq mm0,mm1
703	movd mm1,eax
704	movd ecx,mm0
705	sub  ebp,eax
706	and  ecx, [inflate_fast_mask+eax*4]
707	add  edx,ecx
708
709L_decode_distance_mmx:
710	psrlq mm0,mm1
711
712	cmp  ebp,32
713	ja L_get_dist_code_mmx
714
715	movd mm6,ebp
716	movd mm7,dword ptr [esi]
717	add  esi,4
718	psllq mm7,mm6
719	add  ebp,32
720	por mm0,mm7
721
722L_get_dist_code_mmx:
723	mov  ebx, [esp+12]
724	pand mm5,mm0
725	movd eax,mm5
726	movq mm5,mm2
727	mov  eax, [ebx+eax*4]
728
729L_dodist_mmx:
730
731	movzx  ecx,ah
732	mov  ebx,eax
733	shr  ebx,16
734	sub  ebp,ecx
735	movd mm1,ecx
736
737	test  al,16
738	jz L_test_for_second_level_dist_mmx
739	and  eax,15
740	jz L_check_dist_one_mmx
741
742L_add_bits_to_dist_mmx:
743	psrlq mm0,mm1
744	movd mm1,eax
745	movd ecx,mm0
746	sub  ebp,eax
747	and  ecx, [inflate_fast_mask+eax*4]
748	add  ebx,ecx
749
750L_check_window_mmx:
751	mov  [esp+44],esi
752	mov  eax,edi
753	sub  eax, [esp+40]
754
755	cmp  eax,ebx
756	jb L_clip_window_mmx
757
758	mov  ecx,edx
759	mov  esi,edi
760	sub  esi,ebx
761
762	sub  ecx,3
763	mov  al, [esi]
764	mov  [edi],al
765	mov  al, [esi+1]
766	mov  dl, [esi+2]
767	add  esi,3
768	mov  [edi+1],al
769	mov  [edi+2],dl
770	add  edi,3
771	rep movsb
772
773	mov  esi, [esp+44]
774	mov  ebx, [esp+8]
775	jmp  L_while_test_mmx
776
777ALIGN 4
778L_check_dist_one_mmx:
779	cmp  ebx,1
780	jne  L_check_window_mmx
781	cmp  [esp+40],edi
782	je   L_check_window_mmx
783
784	dec  edi
785	mov  ecx,edx
786	mov  al, [edi]
787	sub  ecx,3
788
789	mov  [edi+1],al
790	mov  [edi+2],al
791	mov  [edi+3],al
792	add  edi,4
793	rep stosb
794
795	mov  ebx, [esp+8]
796	jmp  L_while_test_mmx
797
798ALIGN 4
799L_test_for_second_level_length_mmx:
800	test  al,64
801	jnz L_test_for_end_of_block
802
803	and  eax,15
804	psrlq mm0,mm1
805	movd ecx,mm0
806	and  ecx, [inflate_fast_mask+eax*4]
807	add  ecx,edx
808	mov  eax, [ebx+ecx*4]
809	jmp L_dolen_mmx
810
811ALIGN 4
812L_test_for_second_level_dist_mmx:
813	test  al,64
814	jnz L_invalid_distance_code
815
816	and  eax,15
817	psrlq mm0,mm1
818	movd ecx,mm0
819	and  ecx, [inflate_fast_mask+eax*4]
820	mov  eax, [esp+12]
821	add  ecx,ebx
822	mov  eax, [eax+ecx*4]
823	jmp  L_dodist_mmx
824
825ALIGN 4
826L_clip_window_mmx:
827
828	mov  ecx,eax
829	mov  eax, [esp+52]
830	neg  ecx
831	mov  esi, [esp+56]
832
833	cmp  eax,ebx
834	jb  L_invalid_distance_too_far
835
836	add  ecx,ebx
837	cmp  dword ptr [esp+48],0
838	jne  L_wrap_around_window_mmx
839
840	sub  eax,ecx
841	add  esi,eax
842
843	cmp  edx,ecx
844	jbe  L_do_copy1_mmx
845
846	sub  edx,ecx
847	rep movsb
848	mov  esi,edi
849	sub  esi,ebx
850	jmp  L_do_copy1_mmx
851
852	cmp  edx,ecx
853	jbe  L_do_copy1_mmx
854
855	sub  edx,ecx
856	rep movsb
857	mov  esi,edi
858	sub  esi,ebx
859	jmp  L_do_copy1_mmx
860
861L_wrap_around_window_mmx:
862
863	mov  eax, [esp+48]
864	cmp  ecx,eax
865	jbe  L_contiguous_in_window_mmx
866
867	add  esi, [esp+52]
868	add  esi,eax
869	sub  esi,ecx
870	sub  ecx,eax
871
872
873	cmp  edx,ecx
874	jbe  L_do_copy1_mmx
875
876	sub  edx,ecx
877	rep movsb
878	mov  esi, [esp+56]
879	mov  ecx, [esp+48]
880	cmp  edx,ecx
881	jbe  L_do_copy1_mmx
882
883	sub  edx,ecx
884	rep movsb
885	mov  esi,edi
886	sub  esi,ebx
887	jmp  L_do_copy1_mmx
888
889L_contiguous_in_window_mmx:
890
891	add  esi,eax
892	sub  esi,ecx
893
894
895	cmp  edx,ecx
896	jbe  L_do_copy1_mmx
897
898	sub  edx,ecx
899	rep movsb
900	mov  esi,edi
901	sub  esi,ebx
902
903L_do_copy1_mmx:
904
905
906	mov  ecx,edx
907	rep movsb
908
909	mov  esi, [esp+44]
910	mov  ebx, [esp+8]
911	jmp  L_while_test_mmx
912; 1174 "inffast.S"
913L_invalid_distance_code:
914
915
916
917
918
919	mov  ecx, invalid_distance_code_msg
920	mov  edx,INFLATE_MODE_BAD
921	jmp  L_update_stream_state
922
923L_test_for_end_of_block:
924
925
926
927
928
929	test  al,32
930	jz  L_invalid_literal_length_code
931
932	mov  ecx,0
933	mov  edx,INFLATE_MODE_TYPE
934	jmp  L_update_stream_state
935
936L_invalid_literal_length_code:
937
938
939
940
941
942	mov  ecx, invalid_literal_length_code_msg
943	mov  edx,INFLATE_MODE_BAD
944	jmp  L_update_stream_state
945
946L_invalid_distance_too_far:
947
948
949
950	mov  esi, [esp+44]
951	mov  ecx, invalid_distance_too_far_msg
952	mov  edx,INFLATE_MODE_BAD
953	jmp  L_update_stream_state
954
955L_update_stream_state:
956
957	mov  eax, [esp+88]
958	test  ecx,ecx
959	jz  L_skip_msg
960	mov  [eax+24],ecx
961L_skip_msg:
962	mov  eax, [eax+28]
963	mov  [eax+mode_state],edx
964	jmp  L_break_loop
965
966ALIGN 4
967L_break_loop:
968; 1243 "inffast.S"
969	cmp  dword ptr [inflate_fast_use_mmx],2
970	jne  L_update_next_in
971
972
973
974	mov  ebx,ebp
975
976L_update_next_in:
977; 1266 "inffast.S"
978	mov  eax, [esp+88]
979	mov  ecx,ebx
980	mov  edx, [eax+28]
981	shr  ecx,3
982	sub  esi,ecx
983	shl  ecx,3
984	sub  ebx,ecx
985	mov  [eax+12],edi
986	mov  [edx+bits_state],ebx
987	mov  ecx,ebx
988
989	lea  ebx, [esp+28]
990	cmp  [esp+20],ebx
991	jne  L_buf_not_used
992
993	sub  esi,ebx
994	mov  ebx, [eax+0]
995	mov  [esp+20],ebx
996	add  esi,ebx
997	mov  ebx, [eax+4]
998	sub  ebx,11
999	add  [esp+20],ebx
1000
1001L_buf_not_used:
1002	mov  [eax+0],esi
1003
1004	mov  ebx,1
1005	shl  ebx,cl
1006	dec  ebx
1007
1008
1009
1010
1011
1012	cmp  dword ptr [inflate_fast_use_mmx],2
1013	jne  L_update_hold
1014
1015
1016
1017	psrlq mm0,mm1
1018	movd ebp,mm0
1019
1020	emms
1021
1022L_update_hold:
1023
1024
1025
1026	and  ebp,ebx
1027	mov  [edx+hold_state],ebp
1028
1029
1030
1031
1032	mov  ebx, [esp+20]
1033	cmp  ebx,esi
1034	jbe  L_last_is_smaller
1035
1036	sub  ebx,esi
1037	add  ebx,11
1038	mov  [eax+4],ebx
1039	jmp  L_fixup_out
1040L_last_is_smaller:
1041	sub  esi,ebx
1042	neg  esi
1043	add  esi,11
1044	mov  [eax+4],esi
1045
1046
1047
1048
1049L_fixup_out:
1050
1051	mov  ebx, [esp+16]
1052	cmp  ebx,edi
1053	jbe  L_end_is_smaller
1054
1055	sub  ebx,edi
1056	add  ebx,257
1057	mov  [eax+16],ebx
1058	jmp  L_done
1059L_end_is_smaller:
1060	sub  edi,ebx
1061	neg  edi
1062	add  edi,257
1063	mov  [eax+16],edi
1064
1065
1066
1067
1068
1069L_done:
1070	add  esp,64
1071	popfd
1072	pop  ebx
1073	pop  ebp
1074	pop  esi
1075	pop  edi
1076	ret
1077_inflate_fast endp
1078
1079_TEXT	ends
1080end
1081