1/* crc32.c -- compute the CRC-32 of a data stream
2 * Copyright (C) 1995-2022 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 *
5 * This interleaved implementation of a CRC makes use of pipelined multiple
6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to
7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
8 */
9
10/* @(#) $Id$ */
11
12/*
13  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
14  protection on the static variables used to control the first-use generation
15  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
16  first call get_crc_table() to initialize the tables before allowing more than
17  one thread to use crc32().
18
19  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
20  produced, so that this one source file can be compiled to an executable.
21 */
22
23#ifdef MAKECRCH
24#  include <stdio.h>
25#  ifndef DYNAMIC_CRC_TABLE
26#    define DYNAMIC_CRC_TABLE
27#  endif /* !DYNAMIC_CRC_TABLE */
28#endif /* MAKECRCH */
29
30#include "zutil.h"      /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */
31
32 /*
33  A CRC of a message is computed on N braids of words in the message, where
34  each word consists of W bytes (4 or 8). If N is 3, for example, then three
35  running sparse CRCs are calculated respectively on each braid, at these
36  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
37  This is done starting at a word boundary, and continues until as many blocks
38  of N * W bytes as are available have been processed. The results are combined
39  into a single CRC at the end. For this code, N must be in the range 1..6 and
40  W must be 4 or 8. The upper limit on N can be increased if desired by adding
41  more #if blocks, extending the patterns apparent in the code. In addition,
42  crc32.h would need to be regenerated, if the maximum N value is increased.
43
44  N and W are chosen empirically by benchmarking the execution time on a given
45  processor. The choices for N and W below were based on testing on Intel Kaby
46  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
47  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
48  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
49  They were all tested with either gcc or clang, all using the -O3 optimization
50  level. Your mileage may vary.
51 */
52
53/* Define N */
54#ifdef Z_TESTN
55#  define N Z_TESTN
56#else
57#  define N 5
58#endif
59#if N < 1 || N > 6
60#  error N must be in 1..6
61#endif
62
63/*
64  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
65  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
66  that bytes are eight bits.
67 */
68
69/*
70  Define W and the associated z_word_t type. If W is not defined, then a
71  braided calculation is not used, and the associated tables and code are not
72  compiled.
73 */
74#ifdef Z_TESTW
75#  if Z_TESTW-1 != -1
76#    define W Z_TESTW
77#  endif
78#else
79#  ifdef MAKECRCH
80#    define W 8         /* required for MAKECRCH */
81#  else
82#    if defined(__x86_64__) || defined(__aarch64__)
83#      define W 8
84#    else
85#      define W 4
86#    endif
87#  endif
88#endif
89#ifdef W
90#  if W == 8 && defined(Z_U8)
91     typedef Z_U8 z_word_t;
92#  elif defined(Z_U4)
93#    undef W
94#    define W 4
95     typedef Z_U4 z_word_t;
96#  else
97#    undef W
98#  endif
99#endif
100
101/* If available, use the ARM processor CRC32 instruction. */
102#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8
103#  define ARMCRC32
104#endif
105
106#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE))
107/*
108  Swap the bytes in a z_word_t to convert between little and big endian. Any
109  self-respecting compiler will optimize this to a single machine byte-swap
110  instruction, if one is available. This assumes that word_t is either 32 bits
111  or 64 bits.
112 */
113local z_word_t byte_swap(z_word_t word) {
114#  if W == 8
115    return
116        (word & 0xff00000000000000) >> 56 |
117        (word & 0xff000000000000) >> 40 |
118        (word & 0xff0000000000) >> 24 |
119        (word & 0xff00000000) >> 8 |
120        (word & 0xff000000) << 8 |
121        (word & 0xff0000) << 24 |
122        (word & 0xff00) << 40 |
123        (word & 0xff) << 56;
124#  else   /* W == 4 */
125    return
126        (word & 0xff000000) >> 24 |
127        (word & 0xff0000) >> 8 |
128        (word & 0xff00) << 8 |
129        (word & 0xff) << 24;
130#  endif
131}
132#endif
133
134#ifdef DYNAMIC_CRC_TABLE
135/* =========================================================================
136 * Table of powers of x for combining CRC-32s, filled in by make_crc_table()
137 * below.
138 */
139   local z_crc_t FAR x2n_table[32];
140#else
141/* =========================================================================
142 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
143 * of x for combining CRC-32s, all made by make_crc_table().
144 */
145#  include "crc32.h"
146#endif
147
148/* CRC polynomial. */
149#define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
150
151/*
152  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
153  reflected. For speed, this requires that a not be zero.
154 */
155local z_crc_t multmodp(z_crc_t a, z_crc_t b) {
156    z_crc_t m, p;
157
158    m = (z_crc_t)1 << 31;
159    p = 0;
160    for (;;) {
161        if (a & m) {
162            p ^= b;
163            if ((a & (m - 1)) == 0)
164                break;
165        }
166        m >>= 1;
167        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
168    }
169    return p;
170}
171
172/*
173  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
174  initialized.
175 */
176local z_crc_t x2nmodp(z_off64_t n, unsigned k) {
177    z_crc_t p;
178
179    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
180    while (n) {
181        if (n & 1)
182            p = multmodp(x2n_table[k & 31], p);
183        n >>= 1;
184        k++;
185    }
186    return p;
187}
188
189#ifdef DYNAMIC_CRC_TABLE
190/* =========================================================================
191 * Build the tables for byte-wise and braided CRC-32 calculations, and a table
192 * of powers of x for combining CRC-32s.
193 */
194local z_crc_t FAR crc_table[256];
195#ifdef W
196   local z_word_t FAR crc_big_table[256];
197   local z_crc_t FAR crc_braid_table[W][256];
198   local z_word_t FAR crc_braid_big_table[W][256];
199   local void braid(z_crc_t [][256], z_word_t [][256], int, int);
200#endif
201#ifdef MAKECRCH
202   local void write_table(FILE *, const z_crc_t FAR *, int);
203   local void write_table32hi(FILE *, const z_word_t FAR *, int);
204   local void write_table64(FILE *, const z_word_t FAR *, int);
205#endif /* MAKECRCH */
206
207/*
208  Define a once() function depending on the availability of atomics. If this is
209  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
210  multiple threads, and if atomics are not available, then get_crc_table() must
211  be called to initialize the tables and must return before any threads are
212  allowed to compute or combine CRCs.
213 */
214
215/* Definition of once functionality. */
216typedef struct once_s once_t;
217
218/* Check for the availability of atomics. */
219#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
220    !defined(__STDC_NO_ATOMICS__)
221
222#include <stdatomic.h>
223
224/* Structure for once(), which must be initialized with ONCE_INIT. */
225struct once_s {
226    atomic_flag begun;
227    atomic_int done;
228};
229#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
230
231/*
232  Run the provided init() function exactly once, even if multiple threads
233  invoke once() at the same time. The state must be a once_t initialized with
234  ONCE_INIT.
235 */
236local void once(once_t *state, void (*init)(void)) {
237    if (!atomic_load(&state->done)) {
238        if (atomic_flag_test_and_set(&state->begun))
239            while (!atomic_load(&state->done))
240                ;
241        else {
242            init();
243            atomic_store(&state->done, 1);
244        }
245    }
246}
247
248#else   /* no atomics */
249
250/* Structure for once(), which must be initialized with ONCE_INIT. */
251struct once_s {
252    volatile int begun;
253    volatile int done;
254};
255#define ONCE_INIT {0, 0}
256
257/* Test and set. Alas, not atomic, but tries to minimize the period of
258   vulnerability. */
259local int test_and_set(int volatile *flag) {
260    int was;
261
262    was = *flag;
263    *flag = 1;
264    return was;
265}
266
267/* Run the provided init() function once. This is not thread-safe. */
268local void once(once_t *state, void (*init)(void)) {
269    if (!state->done) {
270        if (test_and_set(&state->begun))
271            while (!state->done)
272                ;
273        else {
274            init();
275            state->done = 1;
276        }
277    }
278}
279
280#endif
281
282/* State for once(). */
283local once_t made = ONCE_INIT;
284
285/*
286  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
287  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
288
289  Polynomials over GF(2) are represented in binary, one bit per coefficient,
290  with the lowest powers in the most significant bit. Then adding polynomials
291  is just exclusive-or, and multiplying a polynomial by x is a right shift by
292  one. If we call the above polynomial p, and represent a byte as the
293  polynomial q, also with the lowest power in the most significant bit (so the
294  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
295  where a mod b means the remainder after dividing a by b.
296
297  This calculation is done using the shift-register method of multiplying and
298  taking the remainder. The register is initialized to zero, and for each
299  incoming bit, x^32 is added mod p to the register if the bit is a one (where
300  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
301  (which is shifting right by one and adding x^32 mod p if the bit shifted out
302  is a one). We start with the highest power (least significant bit) of q and
303  repeat for all eight bits of q.
304
305  The table is simply the CRC of all possible eight bit values. This is all the
306  information needed to generate CRCs on data a byte at a time for all
307  combinations of CRC register values and incoming bytes.
308 */
309
310local void make_crc_table(void) {
311    unsigned i, j, n;
312    z_crc_t p;
313
314    /* initialize the CRC of bytes tables */
315    for (i = 0; i < 256; i++) {
316        p = i;
317        for (j = 0; j < 8; j++)
318            p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
319        crc_table[i] = p;
320#ifdef W
321        crc_big_table[i] = byte_swap(p);
322#endif
323    }
324
325    /* initialize the x^2^n mod p(x) table */
326    p = (z_crc_t)1 << 30;         /* x^1 */
327    x2n_table[0] = p;
328    for (n = 1; n < 32; n++)
329        x2n_table[n] = p = multmodp(p, p);
330
331#ifdef W
332    /* initialize the braiding tables -- needs x2n_table[] */
333    braid(crc_braid_table, crc_braid_big_table, N, W);
334#endif
335
336#ifdef MAKECRCH
337    {
338        /*
339          The crc32.h header file contains tables for both 32-bit and 64-bit
340          z_word_t's, and so requires a 64-bit type be available. In that case,
341          z_word_t must be defined to be 64-bits. This code then also generates
342          and writes out the tables for the case that z_word_t is 32 bits.
343         */
344#if !defined(W) || W != 8
345#  error Need a 64-bit integer type in order to generate crc32.h.
346#endif
347        FILE *out;
348        int k, n;
349        z_crc_t ltl[8][256];
350        z_word_t big[8][256];
351
352        out = fopen("crc32.h", "w");
353        if (out == NULL) return;
354
355        /* write out little-endian CRC table to crc32.h */
356        fprintf(out,
357            "/* crc32.h -- tables for rapid CRC calculation\n"
358            " * Generated automatically by crc32.c\n */\n"
359            "\n"
360            "local const z_crc_t FAR crc_table[] = {\n"
361            "    ");
362        write_table(out, crc_table, 256);
363        fprintf(out,
364            "};\n");
365
366        /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
367        fprintf(out,
368            "\n"
369            "#ifdef W\n"
370            "\n"
371            "#if W == 8\n"
372            "\n"
373            "local const z_word_t FAR crc_big_table[] = {\n"
374            "    ");
375        write_table64(out, crc_big_table, 256);
376        fprintf(out,
377            "};\n");
378
379        /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
380        fprintf(out,
381            "\n"
382            "#else /* W == 4 */\n"
383            "\n"
384            "local const z_word_t FAR crc_big_table[] = {\n"
385            "    ");
386        write_table32hi(out, crc_big_table, 256);
387        fprintf(out,
388            "};\n"
389            "\n"
390            "#endif\n");
391
392        /* write out braid tables for each value of N */
393        for (n = 1; n <= 6; n++) {
394            fprintf(out,
395            "\n"
396            "#if N == %d\n", n);
397
398            /* compute braid tables for this N and 64-bit word_t */
399            braid(ltl, big, n, 8);
400
401            /* write out braid tables for 64-bit z_word_t to crc32.h */
402            fprintf(out,
403            "\n"
404            "#if W == 8\n"
405            "\n"
406            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
407            for (k = 0; k < 8; k++) {
408                fprintf(out, "   {");
409                write_table(out, ltl[k], 256);
410                fprintf(out, "}%s", k < 7 ? ",\n" : "");
411            }
412            fprintf(out,
413            "};\n"
414            "\n"
415            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
416            for (k = 0; k < 8; k++) {
417                fprintf(out, "   {");
418                write_table64(out, big[k], 256);
419                fprintf(out, "}%s", k < 7 ? ",\n" : "");
420            }
421            fprintf(out,
422            "};\n");
423
424            /* compute braid tables for this N and 32-bit word_t */
425            braid(ltl, big, n, 4);
426
427            /* write out braid tables for 32-bit z_word_t to crc32.h */
428            fprintf(out,
429            "\n"
430            "#else /* W == 4 */\n"
431            "\n"
432            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
433            for (k = 0; k < 4; k++) {
434                fprintf(out, "   {");
435                write_table(out, ltl[k], 256);
436                fprintf(out, "}%s", k < 3 ? ",\n" : "");
437            }
438            fprintf(out,
439            "};\n"
440            "\n"
441            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
442            for (k = 0; k < 4; k++) {
443                fprintf(out, "   {");
444                write_table32hi(out, big[k], 256);
445                fprintf(out, "}%s", k < 3 ? ",\n" : "");
446            }
447            fprintf(out,
448            "};\n"
449            "\n"
450            "#endif\n"
451            "\n"
452            "#endif\n");
453        }
454        fprintf(out,
455            "\n"
456            "#endif\n");
457
458        /* write out zeros operator table to crc32.h */
459        fprintf(out,
460            "\n"
461            "local const z_crc_t FAR x2n_table[] = {\n"
462            "    ");
463        write_table(out, x2n_table, 32);
464        fprintf(out,
465            "};\n");
466        fclose(out);
467    }
468#endif /* MAKECRCH */
469}
470
471#ifdef MAKECRCH
472
473/*
474   Write the 32-bit values in table[0..k-1] to out, five per line in
475   hexadecimal separated by commas.
476 */
477local void write_table(FILE *out, const z_crc_t FAR *table, int k) {
478    int n;
479
480    for (n = 0; n < k; n++)
481        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
482                (unsigned long)(table[n]),
483                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
484}
485
486/*
487   Write the high 32-bits of each value in table[0..k-1] to out, five per line
488   in hexadecimal separated by commas.
489 */
490local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) {
491    int n;
492
493    for (n = 0; n < k; n++)
494        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
495                (unsigned long)(table[n] >> 32),
496                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
497}
498
499/*
500  Write the 64-bit values in table[0..k-1] to out, three per line in
501  hexadecimal separated by commas. This assumes that if there is a 64-bit
502  type, then there is also a long long integer type, and it is at least 64
503  bits. If not, then the type cast and format string can be adjusted
504  accordingly.
505 */
506local void write_table64(FILE *out, const z_word_t FAR *table, int k) {
507    int n;
508
509    for (n = 0; n < k; n++)
510        fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
511                (unsigned long long)(table[n]),
512                n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
513}
514
515/* Actually do the deed. */
516int main(void) {
517    make_crc_table();
518    return 0;
519}
520
521#endif /* MAKECRCH */
522
523#ifdef W
524/*
525  Generate the little and big-endian braid tables for the given n and z_word_t
526  size w. Each array must have room for w blocks of 256 elements.
527 */
528local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) {
529    int k;
530    z_crc_t i, p, q;
531    for (k = 0; k < w; k++) {
532        p = x2nmodp((n * w + 3 - k) << 3, 0);
533        ltl[k][0] = 0;
534        big[w - 1 - k][0] = 0;
535        for (i = 1; i < 256; i++) {
536            ltl[k][i] = q = multmodp(i << 24, p);
537            big[w - 1 - k][i] = byte_swap(q);
538        }
539    }
540}
541#endif
542
543#endif /* DYNAMIC_CRC_TABLE */
544
545/* =========================================================================
546 * This function can be used by asm versions of crc32(), and to force the
547 * generation of the CRC tables in a threaded application.
548 */
549const z_crc_t FAR * ZEXPORT get_crc_table(void) {
550#ifdef DYNAMIC_CRC_TABLE
551    once(&made, make_crc_table);
552#endif /* DYNAMIC_CRC_TABLE */
553    return (const z_crc_t FAR *)crc_table;
554}
555
556/* =========================================================================
557 * Use ARM machine instructions if available. This will compute the CRC about
558 * ten times faster than the braided calculation. This code does not check for
559 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
560 * only be defined if the compilation specifies an ARM processor architecture
561 * that has the instructions. For example, compiling with -march=armv8.1-a or
562 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
563 * instructions.
564 */
565#ifdef ARMCRC32
566
567/*
568   Constants empirically determined to maximize speed. These values are from
569   measurements on a Cortex-A57. Your mileage may vary.
570 */
571#define Z_BATCH 3990                /* number of words in a batch */
572#define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
573#define Z_BATCH_MIN 800             /* fewest words in a final batch */
574
575unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
576                              z_size_t len) {
577    z_crc_t val;
578    z_word_t crc1, crc2;
579    const z_word_t *word;
580    z_word_t val0, val1, val2;
581    z_size_t last, last2, i;
582    z_size_t num;
583
584    /* Return initial CRC, if requested. */
585    if (buf == Z_NULL) return 0;
586
587#ifdef DYNAMIC_CRC_TABLE
588    once(&made, make_crc_table);
589#endif /* DYNAMIC_CRC_TABLE */
590
591    /* Pre-condition the CRC */
592    crc = (~crc) & 0xffffffff;
593
594    /* Compute the CRC up to a word boundary. */
595    while (len && ((z_size_t)buf & 7) != 0) {
596        len--;
597        val = *buf++;
598        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
599    }
600
601    /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
602    word = (z_word_t const *)buf;
603    num = len >> 3;
604    len &= 7;
605
606    /* Do three interleaved CRCs to realize the throughput of one crc32x
607       instruction per cycle. Each CRC is calculated on Z_BATCH words. The
608       three CRCs are combined into a single CRC after each set of batches. */
609    while (num >= 3 * Z_BATCH) {
610        crc1 = 0;
611        crc2 = 0;
612        for (i = 0; i < Z_BATCH; i++) {
613            val0 = word[i];
614            val1 = word[i + Z_BATCH];
615            val2 = word[i + 2 * Z_BATCH];
616            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
617            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
618            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
619        }
620        word += 3 * Z_BATCH;
621        num -= 3 * Z_BATCH;
622        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
623        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
624    }
625
626    /* Do one last smaller batch with the remaining words, if there are enough
627       to pay for the combination of CRCs. */
628    last = num / 3;
629    if (last >= Z_BATCH_MIN) {
630        last2 = last << 1;
631        crc1 = 0;
632        crc2 = 0;
633        for (i = 0; i < last; i++) {
634            val0 = word[i];
635            val1 = word[i + last];
636            val2 = word[i + last2];
637            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
638            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
639            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
640        }
641        word += 3 * last;
642        num -= 3 * last;
643        val = x2nmodp(last, 6);
644        crc = multmodp(val, crc) ^ crc1;
645        crc = multmodp(val, crc) ^ crc2;
646    }
647
648    /* Compute the CRC on any remaining words. */
649    for (i = 0; i < num; i++) {
650        val0 = word[i];
651        __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
652    }
653    word += num;
654
655    /* Complete the CRC on any remaining bytes. */
656    buf = (const unsigned char FAR *)word;
657    while (len) {
658        len--;
659        val = *buf++;
660        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
661    }
662
663    /* Return the CRC, post-conditioned. */
664    return crc ^ 0xffffffff;
665}
666
667#else
668
669#ifdef W
670
671/*
672  Return the CRC of the W bytes in the word_t data, taking the
673  least-significant byte of the word as the first byte of data, without any pre
674  or post conditioning. This is used to combine the CRCs of each braid.
675 */
676local z_crc_t crc_word(z_word_t data) {
677    int k;
678    for (k = 0; k < W; k++)
679        data = (data >> 8) ^ crc_table[data & 0xff];
680    return (z_crc_t)data;
681}
682
683local z_word_t crc_word_big(z_word_t data) {
684    int k;
685    for (k = 0; k < W; k++)
686        data = (data << 8) ^
687            crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
688    return data;
689}
690
691#endif
692
693/* ========================================================================= */
694unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf,
695                              z_size_t len) {
696    /* Return initial CRC, if requested. */
697    if (buf == Z_NULL) return 0;
698
699#ifdef DYNAMIC_CRC_TABLE
700    once(&made, make_crc_table);
701#endif /* DYNAMIC_CRC_TABLE */
702
703    /* Pre-condition the CRC */
704    crc = (~crc) & 0xffffffff;
705
706#ifdef W
707
708    /* If provided enough bytes, do a braided CRC calculation. */
709    if (len >= N * W + W - 1) {
710        z_size_t blks;
711        z_word_t const *words;
712        unsigned endian;
713        int k;
714
715        /* Compute the CRC up to a z_word_t boundary. */
716        while (len && ((z_size_t)buf & (W - 1)) != 0) {
717            len--;
718            crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
719        }
720
721        /* Compute the CRC on as many N z_word_t blocks as are available. */
722        blks = len / (N * W);
723        len -= blks * N * W;
724        words = (z_word_t const *)buf;
725
726        /* Do endian check at execution time instead of compile time, since ARM
727           processors can change the endianness at execution time. If the
728           compiler knows what the endianness will be, it can optimize out the
729           check and the unused branch. */
730        endian = 1;
731        if (*(unsigned char *)&endian) {
732            /* Little endian. */
733
734            z_crc_t crc0;
735            z_word_t word0;
736#if N > 1
737            z_crc_t crc1;
738            z_word_t word1;
739#if N > 2
740            z_crc_t crc2;
741            z_word_t word2;
742#if N > 3
743            z_crc_t crc3;
744            z_word_t word3;
745#if N > 4
746            z_crc_t crc4;
747            z_word_t word4;
748#if N > 5
749            z_crc_t crc5;
750            z_word_t word5;
751#endif
752#endif
753#endif
754#endif
755#endif
756
757            /* Initialize the CRC for each braid. */
758            crc0 = crc;
759#if N > 1
760            crc1 = 0;
761#if N > 2
762            crc2 = 0;
763#if N > 3
764            crc3 = 0;
765#if N > 4
766            crc4 = 0;
767#if N > 5
768            crc5 = 0;
769#endif
770#endif
771#endif
772#endif
773#endif
774
775            /*
776              Process the first blks-1 blocks, computing the CRCs on each braid
777              independently.
778             */
779            while (--blks) {
780                /* Load the word for each braid into registers. */
781                word0 = crc0 ^ words[0];
782#if N > 1
783                word1 = crc1 ^ words[1];
784#if N > 2
785                word2 = crc2 ^ words[2];
786#if N > 3
787                word3 = crc3 ^ words[3];
788#if N > 4
789                word4 = crc4 ^ words[4];
790#if N > 5
791                word5 = crc5 ^ words[5];
792#endif
793#endif
794#endif
795#endif
796#endif
797                words += N;
798
799                /* Compute and update the CRC for each word. The loop should
800                   get unrolled. */
801                crc0 = crc_braid_table[0][word0 & 0xff];
802#if N > 1
803                crc1 = crc_braid_table[0][word1 & 0xff];
804#if N > 2
805                crc2 = crc_braid_table[0][word2 & 0xff];
806#if N > 3
807                crc3 = crc_braid_table[0][word3 & 0xff];
808#if N > 4
809                crc4 = crc_braid_table[0][word4 & 0xff];
810#if N > 5
811                crc5 = crc_braid_table[0][word5 & 0xff];
812#endif
813#endif
814#endif
815#endif
816#endif
817                for (k = 1; k < W; k++) {
818                    crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
819#if N > 1
820                    crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
821#if N > 2
822                    crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
823#if N > 3
824                    crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
825#if N > 4
826                    crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
827#if N > 5
828                    crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
829#endif
830#endif
831#endif
832#endif
833#endif
834                }
835            }
836
837            /*
838              Process the last block, combining the CRCs of the N braids at the
839              same time.
840             */
841            crc = crc_word(crc0 ^ words[0]);
842#if N > 1
843            crc = crc_word(crc1 ^ words[1] ^ crc);
844#if N > 2
845            crc = crc_word(crc2 ^ words[2] ^ crc);
846#if N > 3
847            crc = crc_word(crc3 ^ words[3] ^ crc);
848#if N > 4
849            crc = crc_word(crc4 ^ words[4] ^ crc);
850#if N > 5
851            crc = crc_word(crc5 ^ words[5] ^ crc);
852#endif
853#endif
854#endif
855#endif
856#endif
857            words += N;
858        }
859        else {
860            /* Big endian. */
861
862            z_word_t crc0, word0, comb;
863#if N > 1
864            z_word_t crc1, word1;
865#if N > 2
866            z_word_t crc2, word2;
867#if N > 3
868            z_word_t crc3, word3;
869#if N > 4
870            z_word_t crc4, word4;
871#if N > 5
872            z_word_t crc5, word5;
873#endif
874#endif
875#endif
876#endif
877#endif
878
879            /* Initialize the CRC for each braid. */
880            crc0 = byte_swap(crc);
881#if N > 1
882            crc1 = 0;
883#if N > 2
884            crc2 = 0;
885#if N > 3
886            crc3 = 0;
887#if N > 4
888            crc4 = 0;
889#if N > 5
890            crc5 = 0;
891#endif
892#endif
893#endif
894#endif
895#endif
896
897            /*
898              Process the first blks-1 blocks, computing the CRCs on each braid
899              independently.
900             */
901            while (--blks) {
902                /* Load the word for each braid into registers. */
903                word0 = crc0 ^ words[0];
904#if N > 1
905                word1 = crc1 ^ words[1];
906#if N > 2
907                word2 = crc2 ^ words[2];
908#if N > 3
909                word3 = crc3 ^ words[3];
910#if N > 4
911                word4 = crc4 ^ words[4];
912#if N > 5
913                word5 = crc5 ^ words[5];
914#endif
915#endif
916#endif
917#endif
918#endif
919                words += N;
920
921                /* Compute and update the CRC for each word. The loop should
922                   get unrolled. */
923                crc0 = crc_braid_big_table[0][word0 & 0xff];
924#if N > 1
925                crc1 = crc_braid_big_table[0][word1 & 0xff];
926#if N > 2
927                crc2 = crc_braid_big_table[0][word2 & 0xff];
928#if N > 3
929                crc3 = crc_braid_big_table[0][word3 & 0xff];
930#if N > 4
931                crc4 = crc_braid_big_table[0][word4 & 0xff];
932#if N > 5
933                crc5 = crc_braid_big_table[0][word5 & 0xff];
934#endif
935#endif
936#endif
937#endif
938#endif
939                for (k = 1; k < W; k++) {
940                    crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
941#if N > 1
942                    crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
943#if N > 2
944                    crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
945#if N > 3
946                    crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
947#if N > 4
948                    crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
949#if N > 5
950                    crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
951#endif
952#endif
953#endif
954#endif
955#endif
956                }
957            }
958
959            /*
960              Process the last block, combining the CRCs of the N braids at the
961              same time.
962             */
963            comb = crc_word_big(crc0 ^ words[0]);
964#if N > 1
965            comb = crc_word_big(crc1 ^ words[1] ^ comb);
966#if N > 2
967            comb = crc_word_big(crc2 ^ words[2] ^ comb);
968#if N > 3
969            comb = crc_word_big(crc3 ^ words[3] ^ comb);
970#if N > 4
971            comb = crc_word_big(crc4 ^ words[4] ^ comb);
972#if N > 5
973            comb = crc_word_big(crc5 ^ words[5] ^ comb);
974#endif
975#endif
976#endif
977#endif
978#endif
979            words += N;
980            crc = byte_swap(comb);
981        }
982
983        /*
984          Update the pointer to the remaining bytes to process.
985         */
986        buf = (unsigned char const *)words;
987    }
988
989#endif /* W */
990
991    /* Complete the computation of the CRC on any remaining bytes. */
992    while (len >= 8) {
993        len -= 8;
994        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
995        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
996        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
997        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
998        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
999        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1000        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1001        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1002    }
1003    while (len) {
1004        len--;
1005        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
1006    }
1007
1008    /* Return the CRC, post-conditioned. */
1009    return crc ^ 0xffffffff;
1010}
1011
1012#endif
1013
1014/* ========================================================================= */
1015unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf,
1016                            uInt len) {
1017    return crc32_z(crc, buf, len);
1018}
1019
1020/* ========================================================================= */
1021uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) {
1022#ifdef DYNAMIC_CRC_TABLE
1023    once(&made, make_crc_table);
1024#endif /* DYNAMIC_CRC_TABLE */
1025    return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
1026}
1027
1028/* ========================================================================= */
1029uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) {
1030    return crc32_combine64(crc1, crc2, (z_off64_t)len2);
1031}
1032
1033/* ========================================================================= */
1034uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) {
1035#ifdef DYNAMIC_CRC_TABLE
1036    once(&made, make_crc_table);
1037#endif /* DYNAMIC_CRC_TABLE */
1038    return x2nmodp(len2, 3);
1039}
1040
1041/* ========================================================================= */
1042uLong ZEXPORT crc32_combine_gen(z_off_t len2) {
1043    return crc32_combine_gen64((z_off64_t)len2);
1044}
1045
1046/* ========================================================================= */
1047uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) {
1048    return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
1049}
1050