/* Copyright (c) 1990-2007 Info-ZIP. All rights reserved. See the accompanying file LICENSE, version 2000-Apr-09 or later (the contents of which are also included in zip.h) for terms of use. If, for some reason, all these files are missing, the Info-ZIP license also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html */ /* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler. * Last revised: 07-Jan-2007 * * Original coded (in crc_i386.asm) and put into the public domain * by Paul Kienitz and Christian Spieler. * * Revised 06-Oct-96, Scott Field (sfield@microsoft.com) * fixed to assemble with masm by not using .model directive which makes * assumptions about segment alignment. Also, * avoid using loop, and j[e]cxz where possible. Use mov + inc, rather * than lodsb, and other misc. changes resulting in the following performance * increases: * * unrolled loops NO_UNROLLED_LOOPS * *8 >8 <8 *8 >8 <8 * * +54% +42% +35% +82% +52% +25% * * first item in each table is input buffer length, even multiple of 8 * second item in each table is input buffer length, > 8 * third item in each table is input buffer length, < 8 * * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) * Incorporated Rodney Brown's 32-bit-reads optimization as found in the * UNIX AS source crc_i386.S. This new code can be disabled by defining * the macro symbol NO_32_BIT_LOADS. * * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) * Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs * (like the Pentium Pro, Pentium II, and probably some Pentium clones). * This optimization is controlled by the macro symbol __686 and is disabled * by default. (This default is based on the assumption that most users * do not yet work on a Pentium Pro or Pentium II machine ...) * * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++ * 32-bit, removed unneeded kludge for potentially unknown movzx mnemonic, * confirmed correct working with MS VC++ (32-bit). * * Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke * MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its * own __asm {...} construct. For MSVC, a "#pragma warning" was added to * shut up the "no return value" warning message. * * Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file. * * Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier * switching between ASM vs. non-ASM builds, when handling makefiles. * Also enabled the 686 build by default, because there are hardly any * pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.) * * Revised 03-Jan-2006, Chr. Spieler * Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to * data buffer in loop body (adjust pointer only once in loop body and use * offsets to access each item); added additional support for the "unfolded * tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL). * * Revised 07-Jan-2007, Chr. Spieler * Recognize additional conditional flag CRC_TABLE_ONLY that prevents * compilation of the crc32() function. * * FLAT memory model assumed. * * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. * This results in shorter code at the expense of reduced performance. * */ #include "../zip.h" #include "../crc32.h" #if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY) #if !defined(PRE_686) && !defined(__686) # define __686 #endif #ifndef ZCONST # define ZCONST const #endif /* Select wether the following inline-assember code is supported. */ #if (defined(_MSC_VER) && _MSC_VER >= 700) #if (defined(_M_IX86) && _M_IX86 >= 300) # define MSC_INLINE_ASM_32BIT_SUPPORT /* Disable warning for no return value, typical of asm functions */ # pragma warning( disable : 4035 ) #endif #endif #if (defined(__BORLANDC__) && __BORLANDC__ >= 452) # define MSC_INLINE_ASM_32BIT_SUPPORT #endif #ifdef MSC_INLINE_ASM_32BIT_SUPPORT /* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */ /* * These two (three) macros make up the loop body of the CRC32 cruncher. * registers modified: * eax : crc value "c" * esi : pointer to next data byte (or dword) "buf++" * registers read: * edi : pointer to base of crc_table array * scratch registers: * ebx : index into crc_table array * (requires upper three bytes = 0 when __686 is undefined) */ #ifndef __686 #define Do_CRC { \ __asm { mov bl, al }; \ __asm { shr eax, 8 }; \ __asm { xor eax, [edi+ebx*4] }; } #else /* __686 */ #define Do_CRC { \ __asm { movzx ebx, al }; \ __asm { shr eax, 8 }; \ __asm { xor eax, [edi+ebx*4] }; } #endif /* ?__686 */ #define Do_CRC_byte { \ __asm { xor al, byte ptr [esi] }; \ __asm { inc esi }; \ Do_CRC; } #define Do_CRC_byteof(ofs) { \ __asm { xor al, byte ptr [esi+(ofs)] }; \ Do_CRC; } #ifndef NO_32_BIT_LOADS #ifdef IZ_CRCOPTIM_UNFOLDTBL # define SavLen len /* the edx register is needed elsewhere */ # define UpdCRC_dword { \ __asm { movzx ebx,al }; \ __asm { mov edx,[edi+ebx*4+3072] }; \ __asm { movzx ebx,ah }; \ __asm { shr eax,16 }; \ __asm { xor edx,[edi+ebx*4+2048] }; \ __asm { movzx ebx,al }; \ __asm { shr eax,8 }; \ __asm { xor edx,[edi+ebx*4+1024] }; \ __asm { mov eax,[edi+eax*4] }; \ __asm { xor eax,edx }; } # define UpdCRC_dword_sh(dwPtrIncr) { \ __asm { movzx ebx,al }; \ __asm { mov edx,[edi+ebx*4+3072] }; \ __asm { movzx ebx,ah }; \ __asm { xor edx,[edi+ebx*4+2048] }; \ __asm { shr eax,16 }; \ __asm { movzx ebx,al }; \ __asm { add esi, 4*dwPtrIncr }; \ __asm { shr eax,8 }; \ __asm { xor edx,[edi+ebx*4+1024] }; \ __asm { mov eax,[edi+eax*4] }; \ __asm { xor eax,edx }; } #else /* !IZ_CRCOPTIM_UNFOLDTBL */ # define SavLen edx /* the edx register is free for use here */ # define UpdCRC_dword { \ Do_CRC; \ Do_CRC; \ Do_CRC; \ Do_CRC; } # define UpdCRC_dword_sh(dwPtrIncr) { \ Do_CRC; \ Do_CRC; \ __asm { add esi, 4*(dwPtrIncr) }; \ Do_CRC; \ Do_CRC; } #endif /* ?IZ_CRCOPTIM_UNFOLDTBL */ #define Do_CRC_dword { \ __asm { xor eax, dword ptr [esi] }; \ UpdCRC_dword_sh(1); } #define Do_CRC_4dword { \ __asm { xor eax, dword ptr [esi] }; \ UpdCRC_dword; \ __asm { xor eax, dword ptr [esi+4] }; \ UpdCRC_dword; \ __asm { xor eax, dword ptr [esi+8] }; \ UpdCRC_dword; \ __asm { xor eax, dword ptr [esi+12] }; \ UpdCRC_dword_sh(4); } #endif /* !NO_32_BIT_LOADS */ /* ========================================================================= */ ulg crc32(crc, buf, len) ulg crc; /* crc shift register */ ZCONST uch *buf; /* pointer to bytes to pump through */ extent len; /* number of bytes in buf[] */ /* Run a set of bytes through the crc shift register. If buf is a NULL pointer, then initialize the crc shift register contents instead. Return the current crc in either case. */ { __asm { push edx push ecx mov esi,buf ;/* 2nd arg: uch *buf */ sub eax,eax ;/*> if (!buf) */ test esi,esi ;/*> return 0; */ jz fine ;/*> else { */ call get_crc_table mov edi,eax mov eax,crc ;/* 1st arg: ulg crc */ #ifndef __686 sub ebx,ebx ;/* ebx=0; => bl usable as a dword */ #endif mov ecx,len ;/* 3rd arg: extent len */ not eax ;/*> c = ~crc; */ test ecx,ecx #ifndef NO_UNROLLED_LOOPS jz bail # ifndef NO_32_BIT_LOADS align_loop: test esi,3 ;/* align buf pointer on next */ jz aligned_now ;/* dword boundary */ } Do_CRC_byte ; __asm { dec ecx jnz align_loop aligned_now: # endif /* !NO_32_BIT_LOADS */ mov SavLen,ecx ;/* save current len for later */ shr ecx,4 ;/* ecx = len / 16 */ jz No_Sixteens ; align loop head at start of 486 internal cache line !! align 16 Next_Sixteen: } # ifndef NO_32_BIT_LOADS Do_CRC_4dword ; # else /* NO_32_BIT_LOADS */ Do_CRC_byteof(0) ; Do_CRC_byteof(1) ; Do_CRC_byteof(2) ; Do_CRC_byteof(3) ; Do_CRC_byteof(4) ; Do_CRC_byteof(5) ; Do_CRC_byteof(6) ; Do_CRC_byteof(7) ; Do_CRC_byteof(8) ; Do_CRC_byteof(9) ; Do_CRC_byteof(10) ; Do_CRC_byteof(11) ; Do_CRC_byteof(12) ; Do_CRC_byteof(13) ; Do_CRC_byteof(14) ; Do_CRC_byteof(15) ; __asm { add esi,16 }; # endif /* ?NO_32_BIT_LOADS */ __asm { dec ecx jnz Next_Sixteen No_Sixteens: mov ecx,SavLen and ecx,00000000FH ;/* ecx = len % 16 */ # ifndef NO_32_BIT_LOADS shr ecx,2 jz No_Fours Next_Four: } Do_CRC_dword ; __asm { dec ecx jnz Next_Four No_Fours: mov ecx,SavLen and ecx,000000003H ;/* ecx = len % 4 */ # endif /* !NO_32_BIT_LOADS */ #endif /* !NO_UNROLLED_LOOPS */ jz bail ;/*> if (len) */ ; align loop head at start of 486 internal cache line !! align 16 loupe: ;/*> do { */ } Do_CRC_byte ;/* c = CRC32(c,*buf++,crctab);*/ __asm { dec ecx ;/*> } while (--len); */ jnz loupe bail: ;/*> } */ not eax ;/*> return ~c; */ fine: pop ecx pop edx } #ifdef NEED_RETURN return _EAX; #endif } #endif /* MSC_INLINE_ASM_32BIT_SUPPORT */ #if (defined(_MSC_VER) && _MSC_VER >= 700) #if (defined(_M_IX86) && _M_IX86 >= 300) /* Reenable missing return value warning */ # pragma warning( default : 4035 ) #endif #endif #endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */