1/* 2 Copyright (c) 1990-2007 Info-ZIP. All rights reserved. 3 4 See the accompanying file LICENSE, version 2000-Apr-09 or later 5 (the contents of which are also included in zip.h) for terms of use. 6 If, for some reason, all these files are missing, the Info-ZIP license 7 also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html 8*/ 9/* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm 10 * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler. 11 * Last revised: 07-Jan-2007 12 * 13 * Original coded (in crc_i386.asm) and put into the public domain 14 * by Paul Kienitz and Christian Spieler. 15 * 16 * Revised 06-Oct-96, Scott Field (sfield@microsoft.com) 17 * fixed to assemble with masm by not using .model directive which makes 18 * assumptions about segment alignment. Also, 19 * avoid using loop, and j[e]cxz where possible. Use mov + inc, rather 20 * than lodsb, and other misc. changes resulting in the following performance 21 * increases: 22 * 23 * unrolled loops NO_UNROLLED_LOOPS 24 * *8 >8 <8 *8 >8 <8 25 * 26 * +54% +42% +35% +82% +52% +25% 27 * 28 * first item in each table is input buffer length, even multiple of 8 29 * second item in each table is input buffer length, > 8 30 * third item in each table is input buffer length, < 8 31 * 32 * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) 33 * Incorporated Rodney Brown's 32-bit-reads optimization as found in the 34 * UNIX AS source crc_i386.S. This new code can be disabled by defining 35 * the macro symbol NO_32_BIT_LOADS. 36 * 37 * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) 38 * Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs 39 * (like the Pentium Pro, Pentium II, and probably some Pentium clones). 40 * This optimization is controlled by the macro symbol __686 and is disabled 41 * by default. (This default is based on the assumption that most users 42 * do not yet work on a Pentium Pro or Pentium II machine ...) 43 * 44 * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++ 45 * 32-bit, removed unneeded kludge for potentially unknown movzx mnemonic, 46 * confirmed correct working with MS VC++ (32-bit). 47 * 48 * Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke 49 * MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its 50 * own __asm {...} construct. For MSVC, a "#pragma warning" was added to 51 * shut up the "no return value" warning message. 52 * 53 * Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file. 54 * 55 * Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier 56 * switching between ASM vs. non-ASM builds, when handling makefiles. 57 * Also enabled the 686 build by default, because there are hardly any 58 * pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.) 59 * 60 * Revised 03-Jan-2006, Chr. Spieler 61 * Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to 62 * data buffer in loop body (adjust pointer only once in loop body and use 63 * offsets to access each item); added additional support for the "unfolded 64 * tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL). 65 * 66 * Revised 07-Jan-2007, Chr. Spieler 67 * Recognize additional conditional flag CRC_TABLE_ONLY that prevents 68 * compilation of the crc32() function. 69 * 70 * FLAT memory model assumed. 71 * 72 * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. 73 * This results in shorter code at the expense of reduced performance. 74 * 75 */ 76 77#include "../zip.h" 78#include "../crc32.h" 79 80#if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY) 81 82#if !defined(PRE_686) && !defined(__686) 83# define __686 84#endif 85 86#ifndef ZCONST 87# define ZCONST const 88#endif 89 90/* Select wether the following inline-assember code is supported. */ 91#if (defined(_MSC_VER) && _MSC_VER >= 700) 92#if (defined(_M_IX86) && _M_IX86 >= 300) 93# define MSC_INLINE_ASM_32BIT_SUPPORT 94 /* Disable warning for no return value, typical of asm functions */ 95# pragma warning( disable : 4035 ) 96#endif 97#endif 98 99#if (defined(__BORLANDC__) && __BORLANDC__ >= 452) 100# define MSC_INLINE_ASM_32BIT_SUPPORT 101#endif 102 103#ifdef MSC_INLINE_ASM_32BIT_SUPPORT 104/* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */ 105 106/* 107 * These two (three) macros make up the loop body of the CRC32 cruncher. 108 * registers modified: 109 * eax : crc value "c" 110 * esi : pointer to next data byte (or dword) "buf++" 111 * registers read: 112 * edi : pointer to base of crc_table array 113 * scratch registers: 114 * ebx : index into crc_table array 115 * (requires upper three bytes = 0 when __686 is undefined) 116 */ 117#ifndef __686 118#define Do_CRC { \ 119 __asm { mov bl, al }; \ 120 __asm { shr eax, 8 }; \ 121 __asm { xor eax, [edi+ebx*4] }; } 122#else /* __686 */ 123#define Do_CRC { \ 124 __asm { movzx ebx, al }; \ 125 __asm { shr eax, 8 }; \ 126 __asm { xor eax, [edi+ebx*4] }; } 127#endif /* ?__686 */ 128 129#define Do_CRC_byte { \ 130 __asm { xor al, byte ptr [esi] }; \ 131 __asm { inc esi }; \ 132 Do_CRC; } 133 134#define Do_CRC_byteof(ofs) { \ 135 __asm { xor al, byte ptr [esi+(ofs)] }; \ 136 Do_CRC; } 137 138#ifndef NO_32_BIT_LOADS 139#ifdef IZ_CRCOPTIM_UNFOLDTBL 140# define SavLen len /* the edx register is needed elsewhere */ 141# define UpdCRC_dword { \ 142 __asm { movzx ebx,al }; \ 143 __asm { mov edx,[edi+ebx*4+3072] }; \ 144 __asm { movzx ebx,ah }; \ 145 __asm { shr eax,16 }; \ 146 __asm { xor edx,[edi+ebx*4+2048] }; \ 147 __asm { movzx ebx,al }; \ 148 __asm { shr eax,8 }; \ 149 __asm { xor edx,[edi+ebx*4+1024] }; \ 150 __asm { mov eax,[edi+eax*4] }; \ 151 __asm { xor eax,edx }; } 152# define UpdCRC_dword_sh(dwPtrIncr) { \ 153 __asm { movzx ebx,al }; \ 154 __asm { mov edx,[edi+ebx*4+3072] }; \ 155 __asm { movzx ebx,ah }; \ 156 __asm { xor edx,[edi+ebx*4+2048] }; \ 157 __asm { shr eax,16 }; \ 158 __asm { movzx ebx,al }; \ 159 __asm { add esi, 4*dwPtrIncr }; \ 160 __asm { shr eax,8 }; \ 161 __asm { xor edx,[edi+ebx*4+1024] }; \ 162 __asm { mov eax,[edi+eax*4] }; \ 163 __asm { xor eax,edx }; } 164#else /* !IZ_CRCOPTIM_UNFOLDTBL */ 165# define SavLen edx /* the edx register is free for use here */ 166# define UpdCRC_dword { \ 167 Do_CRC; \ 168 Do_CRC; \ 169 Do_CRC; \ 170 Do_CRC; } 171# define UpdCRC_dword_sh(dwPtrIncr) { \ 172 Do_CRC; \ 173 Do_CRC; \ 174 __asm { add esi, 4*(dwPtrIncr) }; \ 175 Do_CRC; \ 176 Do_CRC; } 177#endif /* ?IZ_CRCOPTIM_UNFOLDTBL */ 178 179#define Do_CRC_dword { \ 180 __asm { xor eax, dword ptr [esi] }; \ 181 UpdCRC_dword_sh(1); } 182 183#define Do_CRC_4dword { \ 184 __asm { xor eax, dword ptr [esi] }; \ 185 UpdCRC_dword; \ 186 __asm { xor eax, dword ptr [esi+4] }; \ 187 UpdCRC_dword; \ 188 __asm { xor eax, dword ptr [esi+8] }; \ 189 UpdCRC_dword; \ 190 __asm { xor eax, dword ptr [esi+12] }; \ 191 UpdCRC_dword_sh(4); } 192#endif /* !NO_32_BIT_LOADS */ 193 194/* ========================================================================= */ 195ulg crc32(crc, buf, len) 196 ulg crc; /* crc shift register */ 197 ZCONST uch *buf; /* pointer to bytes to pump through */ 198 extent len; /* number of bytes in buf[] */ 199/* Run a set of bytes through the crc shift register. If buf is a NULL 200 pointer, then initialize the crc shift register contents instead. 201 Return the current crc in either case. */ 202{ 203 __asm { 204 push edx 205 push ecx 206 207 mov esi,buf ;/* 2nd arg: uch *buf */ 208 sub eax,eax ;/*> if (!buf) */ 209 test esi,esi ;/*> return 0; */ 210 jz fine ;/*> else { */ 211 212 call get_crc_table 213 mov edi,eax 214 mov eax,crc ;/* 1st arg: ulg crc */ 215#ifndef __686 216 sub ebx,ebx ;/* ebx=0; => bl usable as a dword */ 217#endif 218 mov ecx,len ;/* 3rd arg: extent len */ 219 not eax ;/*> c = ~crc; */ 220 221 test ecx,ecx 222#ifndef NO_UNROLLED_LOOPS 223 jz bail 224# ifndef NO_32_BIT_LOADS 225align_loop: 226 test esi,3 ;/* align buf pointer on next */ 227 jz aligned_now ;/* dword boundary */ 228 } 229 Do_CRC_byte ; 230 __asm { 231 dec ecx 232 jnz align_loop 233aligned_now: 234# endif /* !NO_32_BIT_LOADS */ 235 mov SavLen,ecx ;/* save current len for later */ 236 shr ecx,4 ;/* ecx = len / 16 */ 237 jz No_Sixteens 238; align loop head at start of 486 internal cache line !! 239 align 16 240Next_Sixteen: 241 } 242# ifndef NO_32_BIT_LOADS 243 Do_CRC_4dword ; 244# else /* NO_32_BIT_LOADS */ 245 Do_CRC_byteof(0) ; 246 Do_CRC_byteof(1) ; 247 Do_CRC_byteof(2) ; 248 Do_CRC_byteof(3) ; 249 Do_CRC_byteof(4) ; 250 Do_CRC_byteof(5) ; 251 Do_CRC_byteof(6) ; 252 Do_CRC_byteof(7) ; 253 Do_CRC_byteof(8) ; 254 Do_CRC_byteof(9) ; 255 Do_CRC_byteof(10) ; 256 Do_CRC_byteof(11) ; 257 Do_CRC_byteof(12) ; 258 Do_CRC_byteof(13) ; 259 Do_CRC_byteof(14) ; 260 Do_CRC_byteof(15) ; 261 __asm { add esi,16 }; 262# endif /* ?NO_32_BIT_LOADS */ 263 __asm { 264 dec ecx 265 jnz Next_Sixteen 266No_Sixteens: 267 mov ecx,SavLen 268 and ecx,00000000FH ;/* ecx = len % 16 */ 269# ifndef NO_32_BIT_LOADS 270 shr ecx,2 271 jz No_Fours 272Next_Four: 273 } 274 Do_CRC_dword ; 275 __asm { 276 dec ecx 277 jnz Next_Four 278No_Fours: 279 mov ecx,SavLen 280 and ecx,000000003H ;/* ecx = len % 4 */ 281# endif /* !NO_32_BIT_LOADS */ 282#endif /* !NO_UNROLLED_LOOPS */ 283 jz bail ;/*> if (len) */ 284; align loop head at start of 486 internal cache line !! 285 align 16 286loupe: ;/*> do { */ 287 } 288 Do_CRC_byte ;/* c = CRC32(c,*buf++,crctab);*/ 289 __asm { 290 dec ecx ;/*> } while (--len); */ 291 jnz loupe 292 293bail: ;/*> } */ 294 not eax ;/*> return ~c; */ 295fine: 296 pop ecx 297 pop edx 298 } 299#ifdef NEED_RETURN 300 return _EAX; 301#endif 302} 303#endif /* MSC_INLINE_ASM_32BIT_SUPPORT */ 304#if (defined(_MSC_VER) && _MSC_VER >= 700) 305#if (defined(_M_IX86) && _M_IX86 >= 300) 306 /* Reenable missing return value warning */ 307# pragma warning( default : 4035 ) 308#endif 309#endif 310#endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */ 311