1239310Sdim;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding 2239310Sdim; * 3239310Sdim; * inffas32.asm is derivated from inffas86.c, with translation of assembly code 4239310Sdim; * 5239310Sdim; * Copyright (C) 1995-2003 Mark Adler 6239310Sdim; * For conditions of distribution and use, see copyright notice in zlib.h 7239310Sdim; * 8239310Sdim; * Copyright (C) 2003 Chris Anderson <christop@charm.net> 9239310Sdim; * Please use the copyright conditions above. 10239310Sdim; * 11239310Sdim; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from 12239310Sdim; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at 13239310Sdim; * the moment. I have successfully compiled and tested this code with gcc2.96, 14239310Sdim; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S 15239310Sdim; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX 16239310Sdim; * enabled. I will attempt to merge the MMX code into this version. Newer 17239310Sdim; * versions of this and inffast.S can be found at 18239310Sdim; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ 19239310Sdim; * 20239310Sdim; * 2005 : modification by Gilles Vollant 21239310Sdim; */ 22239310Sdim; For Visual C++ 4.x and higher and ML 6.x and higher 23239310Sdim; ml.exe is in directory \MASM611C of Win95 DDK 24251662Sdim; ml.exe is also distributed in http://www.masm32.com/masmdl.htm 25276479Sdim; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ 26276479Sdim; 27276479Sdim; 28276479Sdim; compile with command line option 29276479Sdim; ml /coff /Zi /c /Flinffas32.lst inffas32.asm 30276479Sdim 31239310Sdim; if you define NO_GZIP (see inflate.h), compile with 32239310Sdim; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm 33276479Sdim 34276479Sdim 35276479Sdim; zlib122sup is 0 fort zlib 1.2.2.1 and lower 36276479Sdim; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head 37276479Sdim; in inflate_state in inflate.h) 38251662Sdimzlib1222sup equ 8 39251662Sdim 40276479Sdim 41276479SdimIFDEF GUNZIP 42276479Sdim INFLATE_MODE_TYPE equ 11 43276479Sdim INFLATE_MODE_BAD equ 26 44251662SdimELSE 45239310Sdim IFNDEF NO_GUNZIP 46239310Sdim INFLATE_MODE_TYPE equ 11 47239310Sdim INFLATE_MODE_BAD equ 26 48239310Sdim ELSE 49276479Sdim INFLATE_MODE_TYPE equ 3 50276479Sdim INFLATE_MODE_BAD equ 17 51239310Sdim ENDIF 52239310SdimENDIF 53239310Sdim 54239310Sdim 55239310Sdim; 75 "inffast.S" 56239310Sdim;FILE "inffast.S" 57239310Sdim 58239310Sdim;;;GLOBAL _inflate_fast 59239310Sdim 60239310Sdim;;;SECTION .text 61251662Sdim 62251662Sdim 63251662Sdim 64251662Sdim .586p 65251662Sdim .mmx 66239310Sdim 67239310Sdim name inflate_fast_x86 68239310Sdim .MODEL FLAT 69239310Sdim 70239310Sdim_DATA segment 71239310Sdiminflate_fast_use_mmx: 72239310Sdim dd 1 73239310Sdim 74251662Sdim 75251662Sdim_TEXT segment 76251662Sdim 77239310Sdim 78276479Sdim 79251662SdimALIGN 4 80239310Sdim db 'Fast decoding Code from Chris Anderson' 81239310Sdim db 0 82239310Sdim 83276479SdimALIGN 4 84239310Sdiminvalid_literal_length_code_msg: 85239310Sdim db 'invalid literal/length code' 86239310Sdim db 0 87276479Sdim 88239310SdimALIGN 4 89239310Sdiminvalid_distance_code_msg: 90251662Sdim db 'invalid distance code' 91251662Sdim db 0 92239310Sdim 93239310SdimALIGN 4 94239310Sdiminvalid_distance_too_far_msg: 95239310Sdim db 'invalid distance too far back' 96239310Sdim db 0 97239310Sdim 98239310Sdim 99251662SdimALIGN 4 100251662Sdiminflate_fast_mask: 101251662Sdimdd 0 102251662Sdimdd 1 103251662Sdimdd 3 104251662Sdimdd 7 105251662Sdimdd 15 106239310Sdimdd 31 107239310Sdimdd 63 108239310Sdimdd 127 109239310Sdimdd 255 110239310Sdimdd 511 111239310Sdimdd 1023 112239310Sdimdd 2047 113239310Sdimdd 4095 114239310Sdimdd 8191 115239310Sdimdd 16383 116239310Sdimdd 32767 117239310Sdimdd 65535 118239310Sdimdd 131071 119239310Sdimdd 262143 120239310Sdimdd 524287 121239310Sdimdd 1048575 122239310Sdimdd 2097151 123239310Sdimdd 4194303 124239310Sdimdd 8388607 125239310Sdimdd 16777215 126239310Sdimdd 33554431 127239310Sdimdd 67108863 128239310Sdimdd 134217727 129239310Sdimdd 268435455 130239310Sdimdd 536870911 131239310Sdimdd 1073741823 132239310Sdimdd 2147483647 133239310Sdimdd 4294967295 134239310Sdim 135239310Sdim 136239310Sdimmode_state equ 0 ;/* state->mode */ 137239310Sdimwsize_state equ (32+zlib1222sup) ;/* state->wsize */ 138239310Sdimwrite_state equ (36+4+zlib1222sup) ;/* state->write */ 139239310Sdimwindow_state equ (40+4+zlib1222sup) ;/* state->window */ 140239310Sdimhold_state equ (44+4+zlib1222sup) ;/* state->hold */ 141239310Sdimbits_state equ (48+4+zlib1222sup) ;/* state->bits */ 142239310Sdimlencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ 143239310Sdimdistcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ 144239310Sdimlenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ 145239310Sdimdistbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ 146239310Sdim 147239310Sdim 148239310Sdim;;SECTION .text 149239310Sdim; 205 "inffast.S" 150239310Sdim;GLOBAL inflate_fast_use_mmx 151239310Sdim 152239310Sdim;SECTION .data 153239310Sdim 154239310Sdim 155239310Sdim; GLOBAL inflate_fast_use_mmx:object 156239310Sdim;.size inflate_fast_use_mmx, 4 157239310Sdim; 226 "inffast.S" 158239310Sdim;SECTION .text 159239310Sdim 160239310SdimALIGN 4 161239310Sdim_inflate_fast proc near 162239310Sdim.FPO (16, 4, 0, 0, 1, 0) 163239310Sdim push edi 164239310Sdim push esi 165239310Sdim push ebp 166239310Sdim push ebx 167239310Sdim pushfd 168239310Sdim sub esp,64 169239310Sdim cld 170239310Sdim 171239310Sdim 172239310Sdim 173239310Sdim 174239310Sdim mov esi, [esp+88] 175239310Sdim mov edi, [esi+28] 176239310Sdim 177239310Sdim 178239310Sdim 179280031Sdim 180239310Sdim 181239310Sdim 182239310Sdim 183239310Sdim mov edx, [esi+4] 184239310Sdim mov eax, [esi+0] 185239310Sdim 186239310Sdim add edx,eax 187239310Sdim sub edx,11 188239310Sdim 189239310Sdim mov [esp+44],eax 190239310Sdim mov [esp+20],edx 191239310Sdim 192239310Sdim mov ebp, [esp+92] 193239310Sdim mov ecx, [esi+16] 194239310Sdim mov ebx, [esi+12] 195239310Sdim 196239310Sdim sub ebp,ecx 197239310Sdim neg ebp 198239310Sdim add ebp,ebx 199239310Sdim 200239310Sdim sub ecx,257 201239310Sdim add ecx,ebx 202280031Sdim 203239310Sdim mov [esp+60],ebx 204239310Sdim mov [esp+40],ebp 205239310Sdim mov [esp+16],ecx 206239310Sdim; 285 "inffast.S" 207239310Sdim mov eax, [edi+lencode_state] 208239310Sdim mov ecx, [edi+distcode_state] 209239310Sdim 210239310Sdim mov [esp+8],eax 211239310Sdim mov [esp+12],ecx 212239310Sdim 213239310Sdim mov eax,1 214239310Sdim mov ecx, [edi+lenbits_state] 215239310Sdim shl eax,cl 216239310Sdim dec eax 217239310Sdim mov [esp+0],eax 218239310Sdim 219239310Sdim mov eax,1 220239310Sdim mov ecx, [edi+distbits_state] 221239310Sdim shl eax,cl 222239310Sdim dec eax 223239310Sdim mov [esp+4],eax 224239310Sdim 225239310Sdim mov eax, [edi+wsize_state] 226239310Sdim mov ecx, [edi+write_state] 227239310Sdim mov edx, [edi+window_state] 228239310Sdim 229239310Sdim mov [esp+52],eax 230280031Sdim mov [esp+48],ecx 231280031Sdim mov [esp+56],edx 232239310Sdim 233239310Sdim mov ebp, [edi+hold_state] 234239310Sdim mov ebx, [edi+bits_state] 235239310Sdim; 321 "inffast.S" 236251662Sdim mov esi, [esp+44] 237239310Sdim mov ecx, [esp+20] 238239310Sdim cmp ecx,esi 239239310Sdim ja L_align_long 240239310Sdim 241239310Sdim add ecx,11 242239310Sdim sub ecx,esi 243239310Sdim mov eax,12 244239310Sdim sub eax,ecx 245239310Sdim lea edi, [esp+28] 246239310Sdim rep movsb 247239310Sdim mov ecx,eax 248239310Sdim xor eax,eax 249239310Sdim rep stosb 250239310Sdim lea esi, [esp+28] 251239310Sdim mov [esp+20],esi 252239310Sdim jmp L_is_aligned 253239310Sdim 254239310Sdim 255239310SdimL_align_long: 256239310Sdim test esi,3 257239310Sdim jz L_is_aligned 258239310Sdim xor eax,eax 259239310Sdim mov al, [esi] 260239310Sdim inc esi 261239310Sdim mov ecx,ebx 262239310Sdim add ebx,8 263239310Sdim shl eax,cl 264239310Sdim or ebp,eax 265239310Sdim jmp L_align_long 266239310Sdim 267239310SdimL_is_aligned: 268239310Sdim mov edi, [esp+60] 269239310Sdim; 366 "inffast.S" 270239310SdimL_check_mmx: 271239310Sdim cmp dword ptr [inflate_fast_use_mmx],2 272239310Sdim je L_init_mmx 273239310Sdim ja L_do_loop 274239310Sdim 275239310Sdim push eax 276239310Sdim push ebx 277239310Sdim push ecx 278239310Sdim push edx 279239310Sdim pushfd 280239310Sdim mov eax, [esp] 281239310Sdim xor dword ptr [esp],0200000h 282239310Sdim 283239310Sdim 284239310Sdim 285239310Sdim 286239310Sdim popfd 287251662Sdim pushfd 288251662Sdim pop edx 289251662Sdim xor edx,eax 290251662Sdim jz L_dont_use_mmx 291251662Sdim xor eax,eax 292251662Sdim cpuid 293251662Sdim cmp ebx,0756e6547h 294251662Sdim jne L_dont_use_mmx 295251662Sdim cmp ecx,06c65746eh 296251662Sdim jne L_dont_use_mmx 297251662Sdim cmp edx,049656e69h 298251662Sdim jne L_dont_use_mmx 299251662Sdim mov eax,1 300239310Sdim cpuid 301280031Sdim shr eax,8 302251662Sdim and eax,15 303251662Sdim cmp eax,6 304239310Sdim jne L_dont_use_mmx 305280031Sdim test edx,0800000h 306239310Sdim jnz L_use_mmx 307251662Sdim jmp L_dont_use_mmx 308251662SdimL_use_mmx: 309239310Sdim mov dword ptr [inflate_fast_use_mmx],2 310251662Sdim jmp L_check_mmx_pop 311251662SdimL_dont_use_mmx: 312239310Sdim mov dword ptr [inflate_fast_use_mmx],3 313239310SdimL_check_mmx_pop: 314280031Sdim pop edx 315239310Sdim pop ecx 316251662Sdim pop ebx 317251662Sdim pop eax 318239310Sdim jmp L_check_mmx 319251662Sdim; 426 "inffast.S" 320251662SdimALIGN 4 321239310SdimL_do_loop: 322239310Sdim; 437 "inffast.S" 323280031Sdim cmp bl,15 324239310Sdim ja L_get_length_code 325251662Sdim 326251662Sdim xor eax,eax 327239310Sdim lodsw 328251662Sdim mov cl,bl 329251662Sdim add bl,16 330239310Sdim shl eax,cl 331239310Sdim or ebp,eax 332280031Sdim 333239310SdimL_get_length_code: 334251662Sdim mov edx, [esp+0] 335251662Sdim mov ecx, [esp+8] 336239310Sdim and edx,ebp 337251662Sdim mov eax, [ecx+edx*4] 338251662Sdim 339239310SdimL_dolen: 340239310Sdim 341280031Sdim 342251662Sdim 343251662Sdim 344239310Sdim 345239310Sdim 346239310Sdim mov cl,ah 347239310Sdim sub bl,ah 348239310Sdim shr ebp,cl 349239310Sdim 350239310Sdim 351239310Sdim 352239310Sdim 353239310Sdim 354239310Sdim 355239310Sdim test al,al 356243830Sdim jnz L_test_for_length_base 357239310Sdim 358239310Sdim shr eax,16 359239310Sdim stosb 360239310Sdim 361239310SdimL_while_test: 362239310Sdim 363239310Sdim 364280031Sdim cmp [esp+16],edi 365280031Sdim jbe L_break_loop 366280031Sdim 367251662Sdim cmp [esp+20],esi 368239310Sdim ja L_do_loop 369239310Sdim jmp L_break_loop 370239310Sdim 371239310SdimL_test_for_length_base: 372239310Sdim; 502 "inffast.S" 373239310Sdim mov edx,eax 374239310Sdim shr edx,16 375239310Sdim mov cl,al 376239310Sdim 377239310Sdim test al,16 378239310Sdim jz L_test_for_second_level_length 379239310Sdim and cl,15 380239310Sdim jz L_save_len 381239310Sdim cmp bl,cl 382239310Sdim jae L_add_bits_to_len 383239310Sdim 384239310Sdim mov ch,cl 385239310Sdim xor eax,eax 386239310Sdim lodsw 387239310Sdim mov cl,bl 388239310Sdim add bl,16 389239310Sdim shl eax,cl 390239310Sdim or ebp,eax 391239310Sdim mov cl,ch 392239310Sdim 393239310SdimL_add_bits_to_len: 394239310Sdim mov eax,1 395239310Sdim shl eax,cl 396276479Sdim dec eax 397276479Sdim sub bl,cl 398239310Sdim and eax,ebp 399239310Sdim shr ebp,cl 400239310Sdim add edx,eax 401239310Sdim 402239310SdimL_save_len: 403239310Sdim mov [esp+24],edx 404239310Sdim 405239310Sdim 406239310SdimL_decode_distance: 407239310Sdim; 549 "inffast.S" 408239310Sdim cmp bl,15 409239310Sdim ja L_get_distance_code 410239310Sdim 411239310Sdim xor eax,eax 412239310Sdim lodsw 413239310Sdim mov cl,bl 414239310Sdim add bl,16 415280031Sdim shl eax,cl 416280031Sdim or ebp,eax 417280031Sdim 418280031SdimL_get_distance_code: 419280031Sdim mov edx, [esp+4] 420280031Sdim mov ecx, [esp+12] 421239310Sdim and edx,ebp 422239310Sdim mov eax, [ecx+edx*4] 423239310Sdim 424239310Sdim 425239310SdimL_dodist: 426239310Sdim mov edx,eax 427239310Sdim shr edx,16 428239310Sdim mov cl,ah 429239310Sdim sub bl,ah 430239310Sdim shr ebp,cl 431239310Sdim; 584 "inffast.S" 432239310Sdim mov cl,al 433239310Sdim 434239310Sdim test al,16 435239310Sdim jz L_test_for_second_level_dist 436239310Sdim and cl,15 437239310Sdim jz L_check_dist_one 438239310Sdim cmp bl,cl 439239310Sdim jae L_add_bits_to_dist 440239310Sdim 441239310Sdim mov ch,cl 442239310Sdim xor eax,eax 443239310Sdim lodsw 444239310Sdim mov cl,bl 445239310Sdim add bl,16 446239310Sdim shl eax,cl 447239310Sdim or ebp,eax 448239310Sdim mov cl,ch 449239310Sdim 450239310SdimL_add_bits_to_dist: 451239310Sdim mov eax,1 452239310Sdim shl eax,cl 453239310Sdim dec eax 454239310Sdim sub bl,cl 455239310Sdim and eax,ebp 456280031Sdim shr ebp,cl 457280031Sdim add edx,eax 458280031Sdim jmp L_check_window 459239310Sdim 460239310SdimL_check_window: 461239310Sdim; 625 "inffast.S" 462239310Sdim mov [esp+44],esi 463239310Sdim mov eax,edi 464239310Sdim sub eax, [esp+40] 465239310Sdim 466239310Sdim cmp eax,edx 467239310Sdim jb L_clip_window 468239310Sdim 469239310Sdim mov ecx, [esp+24] 470239310Sdim mov esi,edi 471239310Sdim sub esi,edx 472239310Sdim 473239310Sdim sub ecx,3 474239310Sdim mov al, [esi] 475239310Sdim mov [edi],al 476239310Sdim mov al, [esi+1] 477239310Sdim mov dl, [esi+2] 478239310Sdim add esi,3 479239310Sdim mov [edi+1],al 480239310Sdim mov [edi+2],dl 481239310Sdim add edi,3 482239310Sdim rep movsb 483239310Sdim 484239310Sdim mov esi, [esp+44] 485239310Sdim jmp L_while_test 486239310Sdim 487239310SdimALIGN 4 488239310SdimL_check_dist_one: 489239310Sdim cmp edx,1 490239310Sdim jne L_check_window 491239310Sdim cmp [esp+40],edi 492239310Sdim je L_check_window 493239310Sdim 494239310Sdim dec edi 495239310Sdim mov ecx, [esp+24] 496239310Sdim mov al, [edi] 497239310Sdim sub ecx,3 498239310Sdim 499239310Sdim mov [edi+1],al 500239310Sdim mov [edi+2],al 501239310Sdim mov [edi+3],al 502239310Sdim add edi,4 503239310Sdim rep stosb 504239310Sdim 505239310Sdim jmp L_while_test 506239310Sdim 507239310SdimALIGN 4 508239310SdimL_test_for_second_level_length: 509239310Sdim 510239310Sdim 511239310Sdim 512239310Sdim 513239310Sdim test al,64 514239310Sdim jnz L_test_for_end_of_block 515239310Sdim 516239310Sdim mov eax,1 517239310Sdim shl eax,cl 518239310Sdim dec eax 519239310Sdim and eax,ebp 520239310Sdim add eax,edx 521239310Sdim mov edx, [esp+8] 522239310Sdim mov eax, [edx+eax*4] 523239310Sdim jmp L_dolen 524239310Sdim 525239310SdimALIGN 4 526239310SdimL_test_for_second_level_dist: 527239310Sdim 528239310Sdim 529239310Sdim 530239310Sdim 531239310Sdim test al,64 532239310Sdim jnz L_invalid_distance_code 533239310Sdim 534239310Sdim mov eax,1 535239310Sdim shl eax,cl 536239310Sdim dec eax 537239310Sdim and eax,ebp 538239310Sdim add eax,edx 539239310Sdim mov edx, [esp+12] 540239310Sdim mov eax, [edx+eax*4] 541239310Sdim jmp L_dodist 542239310Sdim 543239310SdimALIGN 4 544239310SdimL_clip_window: 545239310Sdim; 721 "inffast.S" 546239310Sdim mov ecx,eax 547280031Sdim mov eax, [esp+52] 548251662Sdim neg ecx 549239310Sdim mov esi, [esp+56] 550239310Sdim 551239310Sdim cmp eax,edx 552239310Sdim jb L_invalid_distance_too_far 553239310Sdim 554239310Sdim add ecx,edx 555239310Sdim cmp dword ptr [esp+48],0 556239310Sdim jne L_wrap_around_window 557239310Sdim 558239310Sdim sub eax,ecx 559239310Sdim add esi,eax 560239310Sdim; 749 "inffast.S" 561239310Sdim mov eax, [esp+24] 562239310Sdim cmp eax,ecx 563239310Sdim jbe L_do_copy1 564239310Sdim 565239310Sdim sub eax,ecx 566239310Sdim rep movsb 567239310Sdim mov esi,edi 568239310Sdim sub esi,edx 569239310Sdim jmp L_do_copy1 570239310Sdim 571239310Sdim cmp eax,ecx 572239310Sdim jbe L_do_copy1 573239310Sdim 574239310Sdim sub eax,ecx 575239310Sdim rep movsb 576239310Sdim mov esi,edi 577239310Sdim sub esi,edx 578239310Sdim jmp L_do_copy1 579239310Sdim 580239310SdimL_wrap_around_window: 581239310Sdim; 793 "inffast.S" 582239310Sdim mov eax, [esp+48] 583239310Sdim cmp ecx,eax 584239310Sdim jbe L_contiguous_in_window 585239310Sdim 586239310Sdim add esi, [esp+52] 587239310Sdim add esi,eax 588239310Sdim sub esi,ecx 589239310Sdim sub ecx,eax 590239310Sdim 591239310Sdim 592239310Sdim mov eax, [esp+24] 593239310Sdim cmp eax,ecx 594239310Sdim jbe L_do_copy1 595239310Sdim 596239310Sdim sub eax,ecx 597239310Sdim rep movsb 598239310Sdim mov esi, [esp+56] 599239310Sdim mov ecx, [esp+48] 600239310Sdim cmp eax,ecx 601239310Sdim jbe L_do_copy1 602251662Sdim 603251662Sdim sub eax,ecx 604239310Sdim rep movsb 605239310Sdim mov esi,edi 606239310Sdim sub esi,edx 607251662Sdim jmp L_do_copy1 608239310Sdim 609239310SdimL_contiguous_in_window: 610239310Sdim; 836 "inffast.S" 611239310Sdim add esi,eax 612239310Sdim sub esi,ecx 613251662Sdim 614280031Sdim 615280031Sdim mov eax, [esp+24] 616251662Sdim cmp eax,ecx 617251662Sdim jbe L_do_copy1 618251662Sdim 619239310Sdim sub eax,ecx 620239310Sdim rep movsb 621239310Sdim mov esi,edi 622251662Sdim sub esi,edx 623251662Sdim 624251662SdimL_do_copy1: 625251662Sdim; 862 "inffast.S" 626251662Sdim mov ecx,eax 627251662Sdim rep movsb 628239310Sdim 629239310Sdim mov esi, [esp+44] 630239310Sdim jmp L_while_test 631239310Sdim; 878 "inffast.S" 632261991SdimALIGN 4 633239310SdimL_init_mmx: 634239310Sdim emms 635239310Sdim 636239310Sdim 637239310Sdim 638239310Sdim 639239310Sdim 640239310Sdim movd mm0,ebp 641239310Sdim mov ebp,ebx 642239310Sdim; 896 "inffast.S" 643239310Sdim movd mm4,dword ptr [esp+0] 644239310Sdim movq mm3,mm4 645239310Sdim movd mm5,dword ptr [esp+4] 646239310Sdim movq mm2,mm5 647239310Sdim pxor mm1,mm1 648239310Sdim mov ebx, [esp+8] 649239310Sdim jmp L_do_loop_mmx 650239310Sdim 651239310SdimALIGN 4 652239310SdimL_do_loop_mmx: 653239310Sdim psrlq mm0,mm1 654239310Sdim 655239310Sdim cmp ebp,32 656 ja L_get_length_code_mmx 657 658 movd mm6,ebp 659 movd mm7,dword ptr [esi] 660 add esi,4 661 psllq mm7,mm6 662 add ebp,32 663 por mm0,mm7 664 665L_get_length_code_mmx: 666 pand mm4,mm0 667 movd eax,mm4 668 movq mm4,mm3 669 mov eax, [ebx+eax*4] 670 671L_dolen_mmx: 672 movzx ecx,ah 673 movd mm1,ecx 674 sub ebp,ecx 675 676 test al,al 677 jnz L_test_for_length_base_mmx 678 679 shr eax,16 680 stosb 681 682L_while_test_mmx: 683 684 685 cmp [esp+16],edi 686 jbe L_break_loop 687 688 cmp [esp+20],esi 689 ja L_do_loop_mmx 690 jmp L_break_loop 691 692L_test_for_length_base_mmx: 693 694 mov edx,eax 695 shr edx,16 696 697 test al,16 698 jz L_test_for_second_level_length_mmx 699 and eax,15 700 jz L_decode_distance_mmx 701 702 psrlq mm0,mm1 703 movd mm1,eax 704 movd ecx,mm0 705 sub ebp,eax 706 and ecx, [inflate_fast_mask+eax*4] 707 add edx,ecx 708 709L_decode_distance_mmx: 710 psrlq mm0,mm1 711 712 cmp ebp,32 713 ja L_get_dist_code_mmx 714 715 movd mm6,ebp 716 movd mm7,dword ptr [esi] 717 add esi,4 718 psllq mm7,mm6 719 add ebp,32 720 por mm0,mm7 721 722L_get_dist_code_mmx: 723 mov ebx, [esp+12] 724 pand mm5,mm0 725 movd eax,mm5 726 movq mm5,mm2 727 mov eax, [ebx+eax*4] 728 729L_dodist_mmx: 730 731 movzx ecx,ah 732 mov ebx,eax 733 shr ebx,16 734 sub ebp,ecx 735 movd mm1,ecx 736 737 test al,16 738 jz L_test_for_second_level_dist_mmx 739 and eax,15 740 jz L_check_dist_one_mmx 741 742L_add_bits_to_dist_mmx: 743 psrlq mm0,mm1 744 movd mm1,eax 745 movd ecx,mm0 746 sub ebp,eax 747 and ecx, [inflate_fast_mask+eax*4] 748 add ebx,ecx 749 750L_check_window_mmx: 751 mov [esp+44],esi 752 mov eax,edi 753 sub eax, [esp+40] 754 755 cmp eax,ebx 756 jb L_clip_window_mmx 757 758 mov ecx,edx 759 mov esi,edi 760 sub esi,ebx 761 762 sub ecx,3 763 mov al, [esi] 764 mov [edi],al 765 mov al, [esi+1] 766 mov dl, [esi+2] 767 add esi,3 768 mov [edi+1],al 769 mov [edi+2],dl 770 add edi,3 771 rep movsb 772 773 mov esi, [esp+44] 774 mov ebx, [esp+8] 775 jmp L_while_test_mmx 776 777ALIGN 4 778L_check_dist_one_mmx: 779 cmp ebx,1 780 jne L_check_window_mmx 781 cmp [esp+40],edi 782 je L_check_window_mmx 783 784 dec edi 785 mov ecx,edx 786 mov al, [edi] 787 sub ecx,3 788 789 mov [edi+1],al 790 mov [edi+2],al 791 mov [edi+3],al 792 add edi,4 793 rep stosb 794 795 mov ebx, [esp+8] 796 jmp L_while_test_mmx 797 798ALIGN 4 799L_test_for_second_level_length_mmx: 800 test al,64 801 jnz L_test_for_end_of_block 802 803 and eax,15 804 psrlq mm0,mm1 805 movd ecx,mm0 806 and ecx, [inflate_fast_mask+eax*4] 807 add ecx,edx 808 mov eax, [ebx+ecx*4] 809 jmp L_dolen_mmx 810 811ALIGN 4 812L_test_for_second_level_dist_mmx: 813 test al,64 814 jnz L_invalid_distance_code 815 816 and eax,15 817 psrlq mm0,mm1 818 movd ecx,mm0 819 and ecx, [inflate_fast_mask+eax*4] 820 mov eax, [esp+12] 821 add ecx,ebx 822 mov eax, [eax+ecx*4] 823 jmp L_dodist_mmx 824 825ALIGN 4 826L_clip_window_mmx: 827 828 mov ecx,eax 829 mov eax, [esp+52] 830 neg ecx 831 mov esi, [esp+56] 832 833 cmp eax,ebx 834 jb L_invalid_distance_too_far 835 836 add ecx,ebx 837 cmp dword ptr [esp+48],0 838 jne L_wrap_around_window_mmx 839 840 sub eax,ecx 841 add esi,eax 842 843 cmp edx,ecx 844 jbe L_do_copy1_mmx 845 846 sub edx,ecx 847 rep movsb 848 mov esi,edi 849 sub esi,ebx 850 jmp L_do_copy1_mmx 851 852 cmp edx,ecx 853 jbe L_do_copy1_mmx 854 855 sub edx,ecx 856 rep movsb 857 mov esi,edi 858 sub esi,ebx 859 jmp L_do_copy1_mmx 860 861L_wrap_around_window_mmx: 862 863 mov eax, [esp+48] 864 cmp ecx,eax 865 jbe L_contiguous_in_window_mmx 866 867 add esi, [esp+52] 868 add esi,eax 869 sub esi,ecx 870 sub ecx,eax 871 872 873 cmp edx,ecx 874 jbe L_do_copy1_mmx 875 876 sub edx,ecx 877 rep movsb 878 mov esi, [esp+56] 879 mov ecx, [esp+48] 880 cmp edx,ecx 881 jbe L_do_copy1_mmx 882 883 sub edx,ecx 884 rep movsb 885 mov esi,edi 886 sub esi,ebx 887 jmp L_do_copy1_mmx 888 889L_contiguous_in_window_mmx: 890 891 add esi,eax 892 sub esi,ecx 893 894 895 cmp edx,ecx 896 jbe L_do_copy1_mmx 897 898 sub edx,ecx 899 rep movsb 900 mov esi,edi 901 sub esi,ebx 902 903L_do_copy1_mmx: 904 905 906 mov ecx,edx 907 rep movsb 908 909 mov esi, [esp+44] 910 mov ebx, [esp+8] 911 jmp L_while_test_mmx 912; 1174 "inffast.S" 913L_invalid_distance_code: 914 915 916 917 918 919 mov ecx, invalid_distance_code_msg 920 mov edx,INFLATE_MODE_BAD 921 jmp L_update_stream_state 922 923L_test_for_end_of_block: 924 925 926 927 928 929 test al,32 930 jz L_invalid_literal_length_code 931 932 mov ecx,0 933 mov edx,INFLATE_MODE_TYPE 934 jmp L_update_stream_state 935 936L_invalid_literal_length_code: 937 938 939 940 941 942 mov ecx, invalid_literal_length_code_msg 943 mov edx,INFLATE_MODE_BAD 944 jmp L_update_stream_state 945 946L_invalid_distance_too_far: 947 948 949 950 mov esi, [esp+44] 951 mov ecx, invalid_distance_too_far_msg 952 mov edx,INFLATE_MODE_BAD 953 jmp L_update_stream_state 954 955L_update_stream_state: 956 957 mov eax, [esp+88] 958 test ecx,ecx 959 jz L_skip_msg 960 mov [eax+24],ecx 961L_skip_msg: 962 mov eax, [eax+28] 963 mov [eax+mode_state],edx 964 jmp L_break_loop 965 966ALIGN 4 967L_break_loop: 968; 1243 "inffast.S" 969 cmp dword ptr [inflate_fast_use_mmx],2 970 jne L_update_next_in 971 972 973 974 mov ebx,ebp 975 976L_update_next_in: 977; 1266 "inffast.S" 978 mov eax, [esp+88] 979 mov ecx,ebx 980 mov edx, [eax+28] 981 shr ecx,3 982 sub esi,ecx 983 shl ecx,3 984 sub ebx,ecx 985 mov [eax+12],edi 986 mov [edx+bits_state],ebx 987 mov ecx,ebx 988 989 lea ebx, [esp+28] 990 cmp [esp+20],ebx 991 jne L_buf_not_used 992 993 sub esi,ebx 994 mov ebx, [eax+0] 995 mov [esp+20],ebx 996 add esi,ebx 997 mov ebx, [eax+4] 998 sub ebx,11 999 add [esp+20],ebx 1000 1001L_buf_not_used: 1002 mov [eax+0],esi 1003 1004 mov ebx,1 1005 shl ebx,cl 1006 dec ebx 1007 1008 1009 1010 1011 1012 cmp dword ptr [inflate_fast_use_mmx],2 1013 jne L_update_hold 1014 1015 1016 1017 psrlq mm0,mm1 1018 movd ebp,mm0 1019 1020 emms 1021 1022L_update_hold: 1023 1024 1025 1026 and ebp,ebx 1027 mov [edx+hold_state],ebp 1028 1029 1030 1031 1032 mov ebx, [esp+20] 1033 cmp ebx,esi 1034 jbe L_last_is_smaller 1035 1036 sub ebx,esi 1037 add ebx,11 1038 mov [eax+4],ebx 1039 jmp L_fixup_out 1040L_last_is_smaller: 1041 sub esi,ebx 1042 neg esi 1043 add esi,11 1044 mov [eax+4],esi 1045 1046 1047 1048 1049L_fixup_out: 1050 1051 mov ebx, [esp+16] 1052 cmp ebx,edi 1053 jbe L_end_is_smaller 1054 1055 sub ebx,edi 1056 add ebx,257 1057 mov [eax+16],ebx 1058 jmp L_done 1059L_end_is_smaller: 1060 sub edi,ebx 1061 neg edi 1062 add edi,257 1063 mov [eax+16],edi 1064 1065 1066 1067 1068 1069L_done: 1070 add esp,64 1071 popfd 1072 pop ebx 1073 pop ebp 1074 pop esi 1075 pop edi 1076 ret 1077_inflate_fast endp 1078 1079_TEXT ends 1080end 1081