1/* 2 Copyright (c) 1990-2007 Info-ZIP. All rights reserved. 3 4 See the accompanying file LICENSE, version 2000-Apr-09 or later 5 (the contents of which are also included in zip.h) for terms of use. 6 If, for some reason, all these files are missing, the Info-ZIP license 7 also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html 8*/ 9/* 10 * crc_i386.S, optimized CRC calculation function for Zip and UnZip, 11 * created by Paul Kienitz and Christian Spieler. Last revised 07 Jan 2007. 12 * 13 * GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm 14 * => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66) 15 * 16 * SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide 17 * aligned reads for most of the data from buffer), can be 18 * disabled by defining the macro NO_32_BIT_LOADS 19 * 20 * SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized 21 * CPUs (like the Pentium Pro, Pentium II, and probably some 22 * Pentium clones). This optimization is controlled by the 23 * preprocessor switch "__686" and is disabled by default. 24 * (This default is based on the assumption that most users 25 * do not yet work on a Pentium Pro or Pentium II machine ...) 26 * 27 * COS 050116: Enabled the 686 build by default, because there are hardly any 28 * pre-686 CPUs in serious use nowadays. (See SPC 970402 above.) 29 * 30 * SPC 060103: Updated code to incorporate newer optimizations found in zlib. 31 * 32 * SPC 070107: Added conditional switch to deactivate crc32() compilation. 33 * 34 * FLAT memory model assumed. Calling interface: 35 * - args are pushed onto the stack from right to left, 36 * - return value is given in the EAX register, 37 * - all other registers (with exception of EFLAGS) are preserved. (With 38 * GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving 39 * them nevertheless adds only 4 single byte instructions.) 40 * 41 * This source generates the function 42 * ulg crc32(ulg crc, ZCONST uch *buf, extent len). 43 * 44 * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. 45 * This results in shorter code at the expense of reduced performance. 46 */ 47 48/* This file is NOT used in conjunction with zlib, or when only creation of 49 * the basic CRC_32_Table (for other purpose) is requested. 50 */ 51#if !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY) 52 53/* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix 54 * external symbols with an underline character '_'. 55 */ 56#if defined(NO_UNDERLINE) || defined(__ELF__) 57# define _crc32 crc32 58# define _get_crc_table get_crc_table 59#endif 60/* Use 16-byte alignment if your assembler supports it. Warning: gas 61 * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4 62 * the parameter is a number of bytes. 63 */ 64#ifndef ALIGNMENT 65# define ALIGNMENT .align 4,0x90 66#endif 67 68#if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386) 69 70/* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas). 71 * Warning: it uses the AT&T syntax: mov source,dest 72 * This file is only optional. If you want to use the C version, 73 * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string. 74 */ 75 76 .file "crc_i386.S" 77 78#if !defined(PRE_686) && !defined(__686) 79 /* Optimize for Pentium Pro and compatible CPUs by default. */ 80# define __686 81#endif 82 83#if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME) 84# undef USE_STACKFRAME 85#else 86 /* The default is to use standard stack frame entry, because it 87 * results in smaller code! 88 */ 89# ifndef USE_STD_STACKFRAME 90# define USE_STD_STACKFRAME 91# endif 92#endif 93 94#ifdef USE_STD_STACKFRAME 95# define _STD_ENTRY pushl %ebp ; movl %esp,%ebp 96# define arg1 8(%ebp) 97# define arg2 12(%ebp) 98# define arg3 16(%ebp) 99# define _STD_LEAVE popl %ebp 100#else /* !USE_STD_STACKFRAME */ 101# define _STD_ENTRY 102# define arg1 24(%esp) 103# define arg2 28(%esp) 104# define arg3 32(%esp) 105# define _STD_LEAVE 106#endif /* ?USE_STD_STACKFRAME */ 107 108/* 109 * These two (three) macros make up the loop body of the CRC32 cruncher. 110 * registers modified: 111 * eax : crc value "c" 112 * esi : pointer to next data byte (or lword) "buf++" 113 * registers read: 114 * edi : pointer to base of crc_table array 115 * scratch registers: 116 * ebx : index into crc_table array 117 * (requires upper three bytes = 0 when __686 is undefined) 118 */ 119#ifndef __686 /* optimize for 386, 486, Pentium */ 120#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ 121 movb %al, %bl ;/* tmp = c & 0xFF */\ 122 shrl $8, %eax ;/* c = (c >> 8) */\ 123 xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */ 124#else /* __686 : optimize for Pentium Pro and compatible CPUs */ 125#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ 126 movzbl %al, %ebx ;/* tmp = c & 0xFF */\ 127 shrl $8, %eax ;/* c = (c >> 8) */\ 128 xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */ 129#endif /* ?__686 */ 130 131#define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\ 132 xorb (%esi), %al ;/* c ^= *buf */\ 133 incl %esi ;/* buf++ */\ 134 Do_CRC 135 136#define Do_CRC_byteof(ofs) /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\ 137 xorb ofs(%esi), %al ;/* c ^= *buf */\ 138 incl %esi ;/* buf++ */\ 139 Do_CRC 140 141#ifndef NO_32_BIT_LOADS 142# ifdef IZ_CRCOPTIM_UNFOLDTBL 143 /* the edx register is needed in crc calculation */ 144# define SavLen arg3 145# define UpdCRC_lword \ 146 movzbl %al, %ebx ; \ 147 movl 3072(%edi,%ebx,4), %edx ; \ 148 movzbl %ah, %ebx ; \ 149 shrl $16, %eax ; \ 150 xor 2048(%edi,%ebx,4), %edx ; \ 151 movzbl %al, %ebx ; \ 152 shrl $8,%eax ; \ 153 xorl 1024(%edi,%ebx,4), %edx ; \ 154 movl (%edi,%eax,4), %eax ; \ 155 xorl %edx,%eax ; 156# define UpdCRC_lword_sh(dwPtrIncr) \ 157 movzbl %al, %ebx ; \ 158 movl 3072(%edi,%ebx,4), %edx ; \ 159 movzbl %ah, %ebx ; \ 160 shrl $16, %eax ; \ 161 xor 2048(%edi,%ebx,4), %edx ; \ 162 movzbl %al, %ebx ; \ 163 addl $4*(dwPtrIncr), %esi ;/* ((ulg *)buf)+=dwPtrIncr */\ 164 shrl $8,%eax ; \ 165 xorl 1024(%edi,%ebx,4), %edx ; \ 166 movl (%edi,%eax,4),%eax ; \ 167 xorl %edx,%eax ; 168# else /* !IZ_CRCOPTIM_UNFOLDTBL */ 169 /* the edx register is not needed anywhere else */ 170# define SavLen %edx 171# define UpdCRC_lword \ 172 Do_CRC \ 173 Do_CRC \ 174 Do_CRC \ 175 Do_CRC 176# define UpdCRC_lword_sh(dwPtrIncr) \ 177 Do_CRC \ 178 Do_CRC \ 179 addl $4*(dwPtrIncr), %esi ;/* ((ulg *)buf)++ */\ 180 Do_CRC \ 181 Do_CRC 182# endif /* ?IZ_CRCOPTIM_UNFOLDTBL */ 183#define Do_CRC_lword \ 184 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\ 185 UpdCRC_lword_sh(1) /* ... ((ulg *)buf)++ */ 186#define Do_CRC_4lword \ 187 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\ 188 UpdCRC_lword \ 189 xorl 4(%esi), %eax ;/* c ^= *((ulg *)buf+1) */\ 190 UpdCRC_lword \ 191 xorl 8(%esi), %eax ;/* c ^= *((ulg *)buf+2) */\ 192 UpdCRC_lword \ 193 xorl 12(%esi), %eax ;/* c ^= *((ulg *)buf]+3 */\ 194 UpdCRC_lword_sh(4) /* ... ((ulg *)buf)+=4 */ 195#endif /* !NO_32_BIT_LOADS */ 196 197 198 .text 199 200 .globl _crc32 201 202_crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */ 203 _STD_ENTRY 204 pushl %edi 205 pushl %esi 206 pushl %ebx 207 pushl %edx 208 pushl %ecx 209 210 movl arg2, %esi /* 2nd arg: uch *buf */ 211 subl %eax, %eax /* > if (!buf) */ 212 testl %esi, %esi /* > return 0; */ 213 jz .L_fine /* > else { */ 214 call _get_crc_table 215 movl %eax, %edi 216 movl arg1, %eax /* 1st arg: ulg crc */ 217#ifndef __686 218 subl %ebx, %ebx /* ebx=0; bl usable as dword */ 219#endif 220 movl arg3, %ecx /* 3rd arg: extent len */ 221 notl %eax /* > c = ~crc; */ 222 223 testl %ecx, %ecx 224#ifndef NO_UNROLLED_LOOPS 225 jz .L_bail 226# ifndef NO_32_BIT_LOADS 227 /* Assert now have positive length */ 228.L_align_loop: 229 testl $3, %esi /* Align buf on lword boundary */ 230 jz .L_aligned_now 231 Do_CRC_byte 232 decl %ecx 233 jnz .L_align_loop 234.L_aligned_now: 235# endif /* !NO_32_BIT_LOADS */ 236 movl %ecx, SavLen /* save current value of len */ 237 shrl $4, %ecx /* ecx = len / 16 */ 238 jz .L_No_Sixteens 239/* align loop head at start of 486 internal cache line !! */ 240 ALIGNMENT 241.L_Next_Sixteen: 242# ifndef NO_32_BIT_LOADS 243 Do_CRC_4lword 244# else /* NO_32_BIT_LOADS */ 245 Do_CRC_byteof(0) 246 Do_CRC_byteof(1) 247 Do_CRC_byteof(2) 248 Do_CRC_byteof(3) 249 Do_CRC_byteof(4) 250 Do_CRC_byteof(5) 251 Do_CRC_byteof(6) 252 Do_CRC_byteof(7) 253 Do_CRC_byteof(8) 254 Do_CRC_byteof(9) 255 Do_CRC_byteof(10) 256 Do_CRC_byteof(11) 257 Do_CRC_byteof(12) 258 Do_CRC_byteof(13) 259 Do_CRC_byteof(14) 260 Do_CRC_byteof(15) 261 addl $16,%esi ;/* buf += 16 */ 262# endif /* ?NO_32_BIT_LOADS */ 263 decl %ecx 264 jnz .L_Next_Sixteen 265 266.L_No_Sixteens: 267 movl SavLen, %ecx 268 andl $15, %ecx /* ecx = len % 16 */ 269# ifndef NO_32_BIT_LOADS 270 shrl $2,%ecx /* ecx = len / 4 */ 271 jz .L_No_Fours 272.L_Next_Four: 273 Do_CRC_lword 274 decl %ecx 275 jnz .L_Next_Four 276.L_No_Fours: 277 movl SavLen,%ecx 278 andl $3,%ecx /* ecx = len % 4 */ 279# endif /* !NO_32_BIT_LOADS */ 280#endif /* !NO_UNROLLED_LOOPS */ 281 jz .L_bail /* > if (len) */ 282/* align loop head at start of 486 internal cache line !! */ 283 ALIGNMENT 284.L_loupe: /* > do { */ 285 Do_CRC_byte /* c = CRC32(c,*buf++,crctab);*/ 286 decl %ecx /* > } while (--len); */ 287 jnz .L_loupe 288 289.L_bail: /* > } */ 290 notl %eax /* > return ~c; */ 291.L_fine: 292 popl %ecx 293 popl %edx 294 popl %ebx 295 popl %esi 296 popl %edi 297 _STD_LEAVE 298 ret 299 300#else 301 error: this asm version is for 386 only 302#endif /* i386 || _i386 || _I386 || __i386 */ 303 304#endif /* !USE_ZLIB && !CRC_TABLE_ONLY */ 305