1/*
2 * Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10/**
11 * The Whirlpool hashing function.
12 *
13 * See
14 *      P.S.L.M. Barreto, V. Rijmen,
15 *      ``The Whirlpool hashing function,''
16 *      NESSIE submission, 2000 (tweaked version, 2001),
17 *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
18 *
19 * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
20 * Vincent Rijmen. Lookup "reference implementations" on
21 * <http://planeta.terra.com.br/informatica/paulobarreto/>
22 *
23 * =============================================================================
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
34 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
35 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 */
38
39/*
40 * Whirlpool low level APIs are deprecated for public use, but still ok for
41 * internal use.
42 */
43#include "internal/deprecated.h"
44
45#include "internal/cryptlib.h"
46#include "wp_local.h"
47#include <string.h>
48
49typedef unsigned char u8;
50#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
51typedef unsigned __int64 u64;
52#elif defined(__arch64__)
53typedef unsigned long u64;
54#else
55typedef unsigned long long u64;
56#endif
57
58#define ROUNDS  10
59
60#define STRICT_ALIGNMENT
61#if !defined(PEDANTIC) && (defined(__i386) || defined(__i386__) || \
62                           defined(__x86_64) || defined(__x86_64__) || \
63                           defined(_M_IX86) || defined(_M_AMD64) || \
64                           defined(_M_X64))
65/*
66 * Well, formally there're couple of other architectures, which permit
67 * unaligned loads, specifically those not crossing cache lines, IA-64 and
68 * PowerPC...
69 */
70# undef STRICT_ALIGNMENT
71#endif
72
73#ifndef STRICT_ALIGNMENT
74# ifdef __GNUC__
75typedef u64 u64_a1 __attribute((__aligned__(1)));
76# else
77typedef u64 u64_a1;
78# endif
79#endif
80
81#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
82typedef u64 u64_aX __attribute((__aligned__(1)));
83#else
84typedef u64 u64_aX;
85#endif
86
87#undef SMALL_REGISTER_BANK
88#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
89# define SMALL_REGISTER_BANK
90# if defined(WHIRLPOOL_ASM)
91#  ifndef OPENSSL_SMALL_FOOTPRINT
92/*
93 * it appears that for elder non-MMX
94 * CPUs this is actually faster!
95 */
96#   define OPENSSL_SMALL_FOOTPRINT
97#  endif
98#  define GO_FOR_MMX(ctx,inp,num)     do {                    \
99        void whirlpool_block_mmx(void *,const void *,size_t);   \
100        if (!(OPENSSL_ia32cap_P[0] & (1<<23)))  break;          \
101        whirlpool_block_mmx(ctx->H.c,inp,num);  return;         \
102                                        } while (0)
103# endif
104#endif
105
106#undef ROTATE
107#ifndef PEDANTIC
108# if defined(_MSC_VER)
109#  if defined(_WIN64)            /* applies to both IA-64 and AMD64 */
110#   include <stdlib.h>
111#   pragma intrinsic(_rotl64)
112#   define ROTATE(a,n) _rotl64((a),n)
113#  endif
114# elif defined(__GNUC__) && __GNUC__>=2
115#  if defined(__x86_64) || defined(__x86_64__)
116#   if defined(L_ENDIAN)
117#    define ROTATE(a,n)       ({ u64 ret; asm ("rolq %1,%0"   \
118                                   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
119#   elif defined(B_ENDIAN)
120       /*
121        * Most will argue that x86_64 is always little-endian. Well, yes, but
122        * then we have stratus.com who has modified gcc to "emulate"
123        * big-endian on x86. Is there evidence that they [or somebody else]
124        * won't do same for x86_64? Naturally no. And this line is waiting
125        * ready for that brave soul:-)
126        */
127#    define ROTATE(a,n)       ({ u64 ret; asm ("rorq %1,%0"   \
128                                   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
129#   endif
130#  elif defined(__ia64) || defined(__ia64__)
131#   if defined(L_ENDIAN)
132#    define ROTATE(a,n)       ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
133                                   : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
134#   elif defined(B_ENDIAN)
135#    define ROTATE(a,n)       ({ u64 ret; asm ("shrp %0=%1,%1,%2"     \
136                                   : "=r"(ret) : "r"(a),"M"(n)); ret; })
137#   endif
138#  endif
139# endif
140#endif
141
142#if defined(OPENSSL_SMALL_FOOTPRINT)
143# if !defined(ROTATE)
144#  if defined(L_ENDIAN)         /* little-endians have to rotate left */
145#   define ROTATE(i,n)       ((i)<<(n) ^ (i)>>(64-n))
146#  elif defined(B_ENDIAN)       /* big-endians have to rotate right */
147#   define ROTATE(i,n)       ((i)>>(n) ^ (i)<<(64-n))
148#  endif
149# endif
150# if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
151#  define STRICT_ALIGNMENT      /* ensure smallest table size */
152# endif
153#endif
154
155/*
156 * Table size depends on STRICT_ALIGNMENT and whether or not endian-
157 * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
158 * defined, which is normally the case on x86[_64] CPUs, the table is
159 * 4KB large unconditionally. Otherwise if ROTATE is defined, the
160 * table is 2KB large, and otherwise - 16KB. 2KB table requires a
161 * whole bunch of additional rotations, but I'm willing to "trade,"
162 * because 16KB table certainly trashes L1 cache. I wish all CPUs
163 * could handle unaligned load as 4KB table doesn't trash the cache,
164 * nor does it require additional rotations.
165 */
166/*
167 * Note that every Cn macro expands as two loads: one byte load and
168 * one quadword load. One can argue that many single-byte loads
169 * is too excessive, as one could load a quadword and "milk" it for
170 * eight 8-bit values instead. Well, yes, but in order to do so *and*
171 * avoid excessive loads you have to accommodate a handful of 64-bit
172 * values in the register bank and issue a bunch of shifts and mask.
173 * It's a tradeoff: loads vs. shift and mask in big register bank[!].
174 * On most CPUs eight single-byte loads are faster and I let other
175 * ones to depend on smart compiler to fold byte loads if beneficial.
176 * Hand-coded assembler would be another alternative:-)
177 */
178#ifdef STRICT_ALIGNMENT
179# if defined(ROTATE)
180#  define N   1
181#  define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7
182#  define C0(K,i)     (Cx.q[K.c[(i)*8+0]])
183#  define C1(K,i)     ROTATE(Cx.q[K.c[(i)*8+1]],8)
184#  define C2(K,i)     ROTATE(Cx.q[K.c[(i)*8+2]],16)
185#  define C3(K,i)     ROTATE(Cx.q[K.c[(i)*8+3]],24)
186#  define C4(K,i)     ROTATE(Cx.q[K.c[(i)*8+4]],32)
187#  define C5(K,i)     ROTATE(Cx.q[K.c[(i)*8+5]],40)
188#  define C6(K,i)     ROTATE(Cx.q[K.c[(i)*8+6]],48)
189#  define C7(K,i)     ROTATE(Cx.q[K.c[(i)*8+7]],56)
190# else
191#  define N   8
192#  define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \
193                                        c7,c0,c1,c2,c3,c4,c5,c6, \
194                                        c6,c7,c0,c1,c2,c3,c4,c5, \
195                                        c5,c6,c7,c0,c1,c2,c3,c4, \
196                                        c4,c5,c6,c7,c0,c1,c2,c3, \
197                                        c3,c4,c5,c6,c7,c0,c1,c2, \
198                                        c2,c3,c4,c5,c6,c7,c0,c1, \
199                                        c1,c2,c3,c4,c5,c6,c7,c0
200#  define C0(K,i)     (Cx.q[0+8*K.c[(i)*8+0]])
201#  define C1(K,i)     (Cx.q[1+8*K.c[(i)*8+1]])
202#  define C2(K,i)     (Cx.q[2+8*K.c[(i)*8+2]])
203#  define C3(K,i)     (Cx.q[3+8*K.c[(i)*8+3]])
204#  define C4(K,i)     (Cx.q[4+8*K.c[(i)*8+4]])
205#  define C5(K,i)     (Cx.q[5+8*K.c[(i)*8+5]])
206#  define C6(K,i)     (Cx.q[6+8*K.c[(i)*8+6]])
207#  define C7(K,i)     (Cx.q[7+8*K.c[(i)*8+7]])
208# endif
209#else
210# define N     2
211# define LL(c0,c1,c2,c3,c4,c5,c6,c7)   c0,c1,c2,c3,c4,c5,c6,c7, \
212                                        c0,c1,c2,c3,c4,c5,c6,c7
213# define C0(K,i)       (((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
214# define C1(K,i)       (((u64_a1*)(Cx.c+7))[2*K.c[(i)*8+1]])
215# define C2(K,i)       (((u64_a1*)(Cx.c+6))[2*K.c[(i)*8+2]])
216# define C3(K,i)       (((u64_a1*)(Cx.c+5))[2*K.c[(i)*8+3]])
217# define C4(K,i)       (((u64_a1*)(Cx.c+4))[2*K.c[(i)*8+4]])
218# define C5(K,i)       (((u64_a1*)(Cx.c+3))[2*K.c[(i)*8+5]])
219# define C6(K,i)       (((u64_a1*)(Cx.c+2))[2*K.c[(i)*8+6]])
220# define C7(K,i)       (((u64_a1*)(Cx.c+1))[2*K.c[(i)*8+7]])
221#endif
222
223static const
224    union {
225    u8 c[(256 * N + ROUNDS) * sizeof(u64)];
226    u64 q[(256 * N + ROUNDS)];
227} Cx = {
228        {
229            /* Note endian-neutral representation:-) */
230            LL(0x18, 0x18, 0x60, 0x18, 0xc0, 0x78, 0x30, 0xd8),
231            LL(0x23, 0x23, 0x8c, 0x23, 0x05, 0xaf, 0x46, 0x26),
232            LL(0xc6, 0xc6, 0x3f, 0xc6, 0x7e, 0xf9, 0x91, 0xb8),
233            LL(0xe8, 0xe8, 0x87, 0xe8, 0x13, 0x6f, 0xcd, 0xfb),
234            LL(0x87, 0x87, 0x26, 0x87, 0x4c, 0xa1, 0x13, 0xcb),
235            LL(0xb8, 0xb8, 0xda, 0xb8, 0xa9, 0x62, 0x6d, 0x11),
236            LL(0x01, 0x01, 0x04, 0x01, 0x08, 0x05, 0x02, 0x09),
237            LL(0x4f, 0x4f, 0x21, 0x4f, 0x42, 0x6e, 0x9e, 0x0d),
238            LL(0x36, 0x36, 0xd8, 0x36, 0xad, 0xee, 0x6c, 0x9b),
239            LL(0xa6, 0xa6, 0xa2, 0xa6, 0x59, 0x04, 0x51, 0xff),
240            LL(0xd2, 0xd2, 0x6f, 0xd2, 0xde, 0xbd, 0xb9, 0x0c),
241            LL(0xf5, 0xf5, 0xf3, 0xf5, 0xfb, 0x06, 0xf7, 0x0e),
242            LL(0x79, 0x79, 0xf9, 0x79, 0xef, 0x80, 0xf2, 0x96),
243            LL(0x6f, 0x6f, 0xa1, 0x6f, 0x5f, 0xce, 0xde, 0x30),
244            LL(0x91, 0x91, 0x7e, 0x91, 0xfc, 0xef, 0x3f, 0x6d),
245            LL(0x52, 0x52, 0x55, 0x52, 0xaa, 0x07, 0xa4, 0xf8),
246            LL(0x60, 0x60, 0x9d, 0x60, 0x27, 0xfd, 0xc0, 0x47),
247            LL(0xbc, 0xbc, 0xca, 0xbc, 0x89, 0x76, 0x65, 0x35),
248            LL(0x9b, 0x9b, 0x56, 0x9b, 0xac, 0xcd, 0x2b, 0x37),
249            LL(0x8e, 0x8e, 0x02, 0x8e, 0x04, 0x8c, 0x01, 0x8a),
250            LL(0xa3, 0xa3, 0xb6, 0xa3, 0x71, 0x15, 0x5b, 0xd2),
251            LL(0x0c, 0x0c, 0x30, 0x0c, 0x60, 0x3c, 0x18, 0x6c),
252            LL(0x7b, 0x7b, 0xf1, 0x7b, 0xff, 0x8a, 0xf6, 0x84),
253            LL(0x35, 0x35, 0xd4, 0x35, 0xb5, 0xe1, 0x6a, 0x80),
254            LL(0x1d, 0x1d, 0x74, 0x1d, 0xe8, 0x69, 0x3a, 0xf5),
255            LL(0xe0, 0xe0, 0xa7, 0xe0, 0x53, 0x47, 0xdd, 0xb3),
256            LL(0xd7, 0xd7, 0x7b, 0xd7, 0xf6, 0xac, 0xb3, 0x21),
257            LL(0xc2, 0xc2, 0x2f, 0xc2, 0x5e, 0xed, 0x99, 0x9c),
258            LL(0x2e, 0x2e, 0xb8, 0x2e, 0x6d, 0x96, 0x5c, 0x43),
259            LL(0x4b, 0x4b, 0x31, 0x4b, 0x62, 0x7a, 0x96, 0x29),
260            LL(0xfe, 0xfe, 0xdf, 0xfe, 0xa3, 0x21, 0xe1, 0x5d),
261            LL(0x57, 0x57, 0x41, 0x57, 0x82, 0x16, 0xae, 0xd5),
262            LL(0x15, 0x15, 0x54, 0x15, 0xa8, 0x41, 0x2a, 0xbd),
263            LL(0x77, 0x77, 0xc1, 0x77, 0x9f, 0xb6, 0xee, 0xe8),
264            LL(0x37, 0x37, 0xdc, 0x37, 0xa5, 0xeb, 0x6e, 0x92),
265            LL(0xe5, 0xe5, 0xb3, 0xe5, 0x7b, 0x56, 0xd7, 0x9e),
266            LL(0x9f, 0x9f, 0x46, 0x9f, 0x8c, 0xd9, 0x23, 0x13),
267            LL(0xf0, 0xf0, 0xe7, 0xf0, 0xd3, 0x17, 0xfd, 0x23),
268            LL(0x4a, 0x4a, 0x35, 0x4a, 0x6a, 0x7f, 0x94, 0x20),
269            LL(0xda, 0xda, 0x4f, 0xda, 0x9e, 0x95, 0xa9, 0x44),
270            LL(0x58, 0x58, 0x7d, 0x58, 0xfa, 0x25, 0xb0, 0xa2),
271            LL(0xc9, 0xc9, 0x03, 0xc9, 0x06, 0xca, 0x8f, 0xcf),
272            LL(0x29, 0x29, 0xa4, 0x29, 0x55, 0x8d, 0x52, 0x7c),
273            LL(0x0a, 0x0a, 0x28, 0x0a, 0x50, 0x22, 0x14, 0x5a),
274            LL(0xb1, 0xb1, 0xfe, 0xb1, 0xe1, 0x4f, 0x7f, 0x50),
275            LL(0xa0, 0xa0, 0xba, 0xa0, 0x69, 0x1a, 0x5d, 0xc9),
276            LL(0x6b, 0x6b, 0xb1, 0x6b, 0x7f, 0xda, 0xd6, 0x14),
277            LL(0x85, 0x85, 0x2e, 0x85, 0x5c, 0xab, 0x17, 0xd9),
278            LL(0xbd, 0xbd, 0xce, 0xbd, 0x81, 0x73, 0x67, 0x3c),
279            LL(0x5d, 0x5d, 0x69, 0x5d, 0xd2, 0x34, 0xba, 0x8f),
280            LL(0x10, 0x10, 0x40, 0x10, 0x80, 0x50, 0x20, 0x90),
281            LL(0xf4, 0xf4, 0xf7, 0xf4, 0xf3, 0x03, 0xf5, 0x07),
282            LL(0xcb, 0xcb, 0x0b, 0xcb, 0x16, 0xc0, 0x8b, 0xdd),
283            LL(0x3e, 0x3e, 0xf8, 0x3e, 0xed, 0xc6, 0x7c, 0xd3),
284            LL(0x05, 0x05, 0x14, 0x05, 0x28, 0x11, 0x0a, 0x2d),
285            LL(0x67, 0x67, 0x81, 0x67, 0x1f, 0xe6, 0xce, 0x78),
286            LL(0xe4, 0xe4, 0xb7, 0xe4, 0x73, 0x53, 0xd5, 0x97),
287            LL(0x27, 0x27, 0x9c, 0x27, 0x25, 0xbb, 0x4e, 0x02),
288            LL(0x41, 0x41, 0x19, 0x41, 0x32, 0x58, 0x82, 0x73),
289            LL(0x8b, 0x8b, 0x16, 0x8b, 0x2c, 0x9d, 0x0b, 0xa7),
290            LL(0xa7, 0xa7, 0xa6, 0xa7, 0x51, 0x01, 0x53, 0xf6),
291            LL(0x7d, 0x7d, 0xe9, 0x7d, 0xcf, 0x94, 0xfa, 0xb2),
292            LL(0x95, 0x95, 0x6e, 0x95, 0xdc, 0xfb, 0x37, 0x49),
293            LL(0xd8, 0xd8, 0x47, 0xd8, 0x8e, 0x9f, 0xad, 0x56),
294            LL(0xfb, 0xfb, 0xcb, 0xfb, 0x8b, 0x30, 0xeb, 0x70),
295            LL(0xee, 0xee, 0x9f, 0xee, 0x23, 0x71, 0xc1, 0xcd),
296            LL(0x7c, 0x7c, 0xed, 0x7c, 0xc7, 0x91, 0xf8, 0xbb),
297            LL(0x66, 0x66, 0x85, 0x66, 0x17, 0xe3, 0xcc, 0x71),
298            LL(0xdd, 0xdd, 0x53, 0xdd, 0xa6, 0x8e, 0xa7, 0x7b),
299            LL(0x17, 0x17, 0x5c, 0x17, 0xb8, 0x4b, 0x2e, 0xaf),
300            LL(0x47, 0x47, 0x01, 0x47, 0x02, 0x46, 0x8e, 0x45),
301            LL(0x9e, 0x9e, 0x42, 0x9e, 0x84, 0xdc, 0x21, 0x1a),
302            LL(0xca, 0xca, 0x0f, 0xca, 0x1e, 0xc5, 0x89, 0xd4),
303            LL(0x2d, 0x2d, 0xb4, 0x2d, 0x75, 0x99, 0x5a, 0x58),
304            LL(0xbf, 0xbf, 0xc6, 0xbf, 0x91, 0x79, 0x63, 0x2e),
305            LL(0x07, 0x07, 0x1c, 0x07, 0x38, 0x1b, 0x0e, 0x3f),
306            LL(0xad, 0xad, 0x8e, 0xad, 0x01, 0x23, 0x47, 0xac),
307            LL(0x5a, 0x5a, 0x75, 0x5a, 0xea, 0x2f, 0xb4, 0xb0),
308            LL(0x83, 0x83, 0x36, 0x83, 0x6c, 0xb5, 0x1b, 0xef),
309            LL(0x33, 0x33, 0xcc, 0x33, 0x85, 0xff, 0x66, 0xb6),
310            LL(0x63, 0x63, 0x91, 0x63, 0x3f, 0xf2, 0xc6, 0x5c),
311            LL(0x02, 0x02, 0x08, 0x02, 0x10, 0x0a, 0x04, 0x12),
312            LL(0xaa, 0xaa, 0x92, 0xaa, 0x39, 0x38, 0x49, 0x93),
313            LL(0x71, 0x71, 0xd9, 0x71, 0xaf, 0xa8, 0xe2, 0xde),
314            LL(0xc8, 0xc8, 0x07, 0xc8, 0x0e, 0xcf, 0x8d, 0xc6),
315            LL(0x19, 0x19, 0x64, 0x19, 0xc8, 0x7d, 0x32, 0xd1),
316            LL(0x49, 0x49, 0x39, 0x49, 0x72, 0x70, 0x92, 0x3b),
317            LL(0xd9, 0xd9, 0x43, 0xd9, 0x86, 0x9a, 0xaf, 0x5f),
318            LL(0xf2, 0xf2, 0xef, 0xf2, 0xc3, 0x1d, 0xf9, 0x31),
319            LL(0xe3, 0xe3, 0xab, 0xe3, 0x4b, 0x48, 0xdb, 0xa8),
320            LL(0x5b, 0x5b, 0x71, 0x5b, 0xe2, 0x2a, 0xb6, 0xb9),
321            LL(0x88, 0x88, 0x1a, 0x88, 0x34, 0x92, 0x0d, 0xbc),
322            LL(0x9a, 0x9a, 0x52, 0x9a, 0xa4, 0xc8, 0x29, 0x3e),
323            LL(0x26, 0x26, 0x98, 0x26, 0x2d, 0xbe, 0x4c, 0x0b),
324            LL(0x32, 0x32, 0xc8, 0x32, 0x8d, 0xfa, 0x64, 0xbf),
325            LL(0xb0, 0xb0, 0xfa, 0xb0, 0xe9, 0x4a, 0x7d, 0x59),
326            LL(0xe9, 0xe9, 0x83, 0xe9, 0x1b, 0x6a, 0xcf, 0xf2),
327            LL(0x0f, 0x0f, 0x3c, 0x0f, 0x78, 0x33, 0x1e, 0x77),
328            LL(0xd5, 0xd5, 0x73, 0xd5, 0xe6, 0xa6, 0xb7, 0x33),
329            LL(0x80, 0x80, 0x3a, 0x80, 0x74, 0xba, 0x1d, 0xf4),
330            LL(0xbe, 0xbe, 0xc2, 0xbe, 0x99, 0x7c, 0x61, 0x27),
331            LL(0xcd, 0xcd, 0x13, 0xcd, 0x26, 0xde, 0x87, 0xeb),
332            LL(0x34, 0x34, 0xd0, 0x34, 0xbd, 0xe4, 0x68, 0x89),
333            LL(0x48, 0x48, 0x3d, 0x48, 0x7a, 0x75, 0x90, 0x32),
334            LL(0xff, 0xff, 0xdb, 0xff, 0xab, 0x24, 0xe3, 0x54),
335            LL(0x7a, 0x7a, 0xf5, 0x7a, 0xf7, 0x8f, 0xf4, 0x8d),
336            LL(0x90, 0x90, 0x7a, 0x90, 0xf4, 0xea, 0x3d, 0x64),
337            LL(0x5f, 0x5f, 0x61, 0x5f, 0xc2, 0x3e, 0xbe, 0x9d),
338            LL(0x20, 0x20, 0x80, 0x20, 0x1d, 0xa0, 0x40, 0x3d),
339            LL(0x68, 0x68, 0xbd, 0x68, 0x67, 0xd5, 0xd0, 0x0f),
340            LL(0x1a, 0x1a, 0x68, 0x1a, 0xd0, 0x72, 0x34, 0xca),
341            LL(0xae, 0xae, 0x82, 0xae, 0x19, 0x2c, 0x41, 0xb7),
342            LL(0xb4, 0xb4, 0xea, 0xb4, 0xc9, 0x5e, 0x75, 0x7d),
343            LL(0x54, 0x54, 0x4d, 0x54, 0x9a, 0x19, 0xa8, 0xce),
344            LL(0x93, 0x93, 0x76, 0x93, 0xec, 0xe5, 0x3b, 0x7f),
345            LL(0x22, 0x22, 0x88, 0x22, 0x0d, 0xaa, 0x44, 0x2f),
346            LL(0x64, 0x64, 0x8d, 0x64, 0x07, 0xe9, 0xc8, 0x63),
347            LL(0xf1, 0xf1, 0xe3, 0xf1, 0xdb, 0x12, 0xff, 0x2a),
348            LL(0x73, 0x73, 0xd1, 0x73, 0xbf, 0xa2, 0xe6, 0xcc),
349            LL(0x12, 0x12, 0x48, 0x12, 0x90, 0x5a, 0x24, 0x82),
350            LL(0x40, 0x40, 0x1d, 0x40, 0x3a, 0x5d, 0x80, 0x7a),
351            LL(0x08, 0x08, 0x20, 0x08, 0x40, 0x28, 0x10, 0x48),
352            LL(0xc3, 0xc3, 0x2b, 0xc3, 0x56, 0xe8, 0x9b, 0x95),
353            LL(0xec, 0xec, 0x97, 0xec, 0x33, 0x7b, 0xc5, 0xdf),
354            LL(0xdb, 0xdb, 0x4b, 0xdb, 0x96, 0x90, 0xab, 0x4d),
355            LL(0xa1, 0xa1, 0xbe, 0xa1, 0x61, 0x1f, 0x5f, 0xc0),
356            LL(0x8d, 0x8d, 0x0e, 0x8d, 0x1c, 0x83, 0x07, 0x91),
357            LL(0x3d, 0x3d, 0xf4, 0x3d, 0xf5, 0xc9, 0x7a, 0xc8),
358            LL(0x97, 0x97, 0x66, 0x97, 0xcc, 0xf1, 0x33, 0x5b),
359            LL(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
360            LL(0xcf, 0xcf, 0x1b, 0xcf, 0x36, 0xd4, 0x83, 0xf9),
361            LL(0x2b, 0x2b, 0xac, 0x2b, 0x45, 0x87, 0x56, 0x6e),
362            LL(0x76, 0x76, 0xc5, 0x76, 0x97, 0xb3, 0xec, 0xe1),
363            LL(0x82, 0x82, 0x32, 0x82, 0x64, 0xb0, 0x19, 0xe6),
364            LL(0xd6, 0xd6, 0x7f, 0xd6, 0xfe, 0xa9, 0xb1, 0x28),
365            LL(0x1b, 0x1b, 0x6c, 0x1b, 0xd8, 0x77, 0x36, 0xc3),
366            LL(0xb5, 0xb5, 0xee, 0xb5, 0xc1, 0x5b, 0x77, 0x74),
367            LL(0xaf, 0xaf, 0x86, 0xaf, 0x11, 0x29, 0x43, 0xbe),
368            LL(0x6a, 0x6a, 0xb5, 0x6a, 0x77, 0xdf, 0xd4, 0x1d),
369            LL(0x50, 0x50, 0x5d, 0x50, 0xba, 0x0d, 0xa0, 0xea),
370            LL(0x45, 0x45, 0x09, 0x45, 0x12, 0x4c, 0x8a, 0x57),
371            LL(0xf3, 0xf3, 0xeb, 0xf3, 0xcb, 0x18, 0xfb, 0x38),
372            LL(0x30, 0x30, 0xc0, 0x30, 0x9d, 0xf0, 0x60, 0xad),
373            LL(0xef, 0xef, 0x9b, 0xef, 0x2b, 0x74, 0xc3, 0xc4),
374            LL(0x3f, 0x3f, 0xfc, 0x3f, 0xe5, 0xc3, 0x7e, 0xda),
375            LL(0x55, 0x55, 0x49, 0x55, 0x92, 0x1c, 0xaa, 0xc7),
376            LL(0xa2, 0xa2, 0xb2, 0xa2, 0x79, 0x10, 0x59, 0xdb),
377            LL(0xea, 0xea, 0x8f, 0xea, 0x03, 0x65, 0xc9, 0xe9),
378            LL(0x65, 0x65, 0x89, 0x65, 0x0f, 0xec, 0xca, 0x6a),
379            LL(0xba, 0xba, 0xd2, 0xba, 0xb9, 0x68, 0x69, 0x03),
380            LL(0x2f, 0x2f, 0xbc, 0x2f, 0x65, 0x93, 0x5e, 0x4a),
381            LL(0xc0, 0xc0, 0x27, 0xc0, 0x4e, 0xe7, 0x9d, 0x8e),
382            LL(0xde, 0xde, 0x5f, 0xde, 0xbe, 0x81, 0xa1, 0x60),
383            LL(0x1c, 0x1c, 0x70, 0x1c, 0xe0, 0x6c, 0x38, 0xfc),
384            LL(0xfd, 0xfd, 0xd3, 0xfd, 0xbb, 0x2e, 0xe7, 0x46),
385            LL(0x4d, 0x4d, 0x29, 0x4d, 0x52, 0x64, 0x9a, 0x1f),
386            LL(0x92, 0x92, 0x72, 0x92, 0xe4, 0xe0, 0x39, 0x76),
387            LL(0x75, 0x75, 0xc9, 0x75, 0x8f, 0xbc, 0xea, 0xfa),
388            LL(0x06, 0x06, 0x18, 0x06, 0x30, 0x1e, 0x0c, 0x36),
389            LL(0x8a, 0x8a, 0x12, 0x8a, 0x24, 0x98, 0x09, 0xae),
390            LL(0xb2, 0xb2, 0xf2, 0xb2, 0xf9, 0x40, 0x79, 0x4b),
391            LL(0xe6, 0xe6, 0xbf, 0xe6, 0x63, 0x59, 0xd1, 0x85),
392            LL(0x0e, 0x0e, 0x38, 0x0e, 0x70, 0x36, 0x1c, 0x7e),
393            LL(0x1f, 0x1f, 0x7c, 0x1f, 0xf8, 0x63, 0x3e, 0xe7),
394            LL(0x62, 0x62, 0x95, 0x62, 0x37, 0xf7, 0xc4, 0x55),
395            LL(0xd4, 0xd4, 0x77, 0xd4, 0xee, 0xa3, 0xb5, 0x3a),
396            LL(0xa8, 0xa8, 0x9a, 0xa8, 0x29, 0x32, 0x4d, 0x81),
397            LL(0x96, 0x96, 0x62, 0x96, 0xc4, 0xf4, 0x31, 0x52),
398            LL(0xf9, 0xf9, 0xc3, 0xf9, 0x9b, 0x3a, 0xef, 0x62),
399            LL(0xc5, 0xc5, 0x33, 0xc5, 0x66, 0xf6, 0x97, 0xa3),
400            LL(0x25, 0x25, 0x94, 0x25, 0x35, 0xb1, 0x4a, 0x10),
401            LL(0x59, 0x59, 0x79, 0x59, 0xf2, 0x20, 0xb2, 0xab),
402            LL(0x84, 0x84, 0x2a, 0x84, 0x54, 0xae, 0x15, 0xd0),
403            LL(0x72, 0x72, 0xd5, 0x72, 0xb7, 0xa7, 0xe4, 0xc5),
404            LL(0x39, 0x39, 0xe4, 0x39, 0xd5, 0xdd, 0x72, 0xec),
405            LL(0x4c, 0x4c, 0x2d, 0x4c, 0x5a, 0x61, 0x98, 0x16),
406            LL(0x5e, 0x5e, 0x65, 0x5e, 0xca, 0x3b, 0xbc, 0x94),
407            LL(0x78, 0x78, 0xfd, 0x78, 0xe7, 0x85, 0xf0, 0x9f),
408            LL(0x38, 0x38, 0xe0, 0x38, 0xdd, 0xd8, 0x70, 0xe5),
409            LL(0x8c, 0x8c, 0x0a, 0x8c, 0x14, 0x86, 0x05, 0x98),
410            LL(0xd1, 0xd1, 0x63, 0xd1, 0xc6, 0xb2, 0xbf, 0x17),
411            LL(0xa5, 0xa5, 0xae, 0xa5, 0x41, 0x0b, 0x57, 0xe4),
412            LL(0xe2, 0xe2, 0xaf, 0xe2, 0x43, 0x4d, 0xd9, 0xa1),
413            LL(0x61, 0x61, 0x99, 0x61, 0x2f, 0xf8, 0xc2, 0x4e),
414            LL(0xb3, 0xb3, 0xf6, 0xb3, 0xf1, 0x45, 0x7b, 0x42),
415            LL(0x21, 0x21, 0x84, 0x21, 0x15, 0xa5, 0x42, 0x34),
416            LL(0x9c, 0x9c, 0x4a, 0x9c, 0x94, 0xd6, 0x25, 0x08),
417            LL(0x1e, 0x1e, 0x78, 0x1e, 0xf0, 0x66, 0x3c, 0xee),
418            LL(0x43, 0x43, 0x11, 0x43, 0x22, 0x52, 0x86, 0x61),
419            LL(0xc7, 0xc7, 0x3b, 0xc7, 0x76, 0xfc, 0x93, 0xb1),
420            LL(0xfc, 0xfc, 0xd7, 0xfc, 0xb3, 0x2b, 0xe5, 0x4f),
421            LL(0x04, 0x04, 0x10, 0x04, 0x20, 0x14, 0x08, 0x24),
422            LL(0x51, 0x51, 0x59, 0x51, 0xb2, 0x08, 0xa2, 0xe3),
423            LL(0x99, 0x99, 0x5e, 0x99, 0xbc, 0xc7, 0x2f, 0x25),
424            LL(0x6d, 0x6d, 0xa9, 0x6d, 0x4f, 0xc4, 0xda, 0x22),
425            LL(0x0d, 0x0d, 0x34, 0x0d, 0x68, 0x39, 0x1a, 0x65),
426            LL(0xfa, 0xfa, 0xcf, 0xfa, 0x83, 0x35, 0xe9, 0x79),
427            LL(0xdf, 0xdf, 0x5b, 0xdf, 0xb6, 0x84, 0xa3, 0x69),
428            LL(0x7e, 0x7e, 0xe5, 0x7e, 0xd7, 0x9b, 0xfc, 0xa9),
429            LL(0x24, 0x24, 0x90, 0x24, 0x3d, 0xb4, 0x48, 0x19),
430            LL(0x3b, 0x3b, 0xec, 0x3b, 0xc5, 0xd7, 0x76, 0xfe),
431            LL(0xab, 0xab, 0x96, 0xab, 0x31, 0x3d, 0x4b, 0x9a),
432            LL(0xce, 0xce, 0x1f, 0xce, 0x3e, 0xd1, 0x81, 0xf0),
433            LL(0x11, 0x11, 0x44, 0x11, 0x88, 0x55, 0x22, 0x99),
434            LL(0x8f, 0x8f, 0x06, 0x8f, 0x0c, 0x89, 0x03, 0x83),
435            LL(0x4e, 0x4e, 0x25, 0x4e, 0x4a, 0x6b, 0x9c, 0x04),
436            LL(0xb7, 0xb7, 0xe6, 0xb7, 0xd1, 0x51, 0x73, 0x66),
437            LL(0xeb, 0xeb, 0x8b, 0xeb, 0x0b, 0x60, 0xcb, 0xe0),
438            LL(0x3c, 0x3c, 0xf0, 0x3c, 0xfd, 0xcc, 0x78, 0xc1),
439            LL(0x81, 0x81, 0x3e, 0x81, 0x7c, 0xbf, 0x1f, 0xfd),
440            LL(0x94, 0x94, 0x6a, 0x94, 0xd4, 0xfe, 0x35, 0x40),
441            LL(0xf7, 0xf7, 0xfb, 0xf7, 0xeb, 0x0c, 0xf3, 0x1c),
442            LL(0xb9, 0xb9, 0xde, 0xb9, 0xa1, 0x67, 0x6f, 0x18),
443            LL(0x13, 0x13, 0x4c, 0x13, 0x98, 0x5f, 0x26, 0x8b),
444            LL(0x2c, 0x2c, 0xb0, 0x2c, 0x7d, 0x9c, 0x58, 0x51),
445            LL(0xd3, 0xd3, 0x6b, 0xd3, 0xd6, 0xb8, 0xbb, 0x05),
446            LL(0xe7, 0xe7, 0xbb, 0xe7, 0x6b, 0x5c, 0xd3, 0x8c),
447            LL(0x6e, 0x6e, 0xa5, 0x6e, 0x57, 0xcb, 0xdc, 0x39),
448            LL(0xc4, 0xc4, 0x37, 0xc4, 0x6e, 0xf3, 0x95, 0xaa),
449            LL(0x03, 0x03, 0x0c, 0x03, 0x18, 0x0f, 0x06, 0x1b),
450            LL(0x56, 0x56, 0x45, 0x56, 0x8a, 0x13, 0xac, 0xdc),
451            LL(0x44, 0x44, 0x0d, 0x44, 0x1a, 0x49, 0x88, 0x5e),
452            LL(0x7f, 0x7f, 0xe1, 0x7f, 0xdf, 0x9e, 0xfe, 0xa0),
453            LL(0xa9, 0xa9, 0x9e, 0xa9, 0x21, 0x37, 0x4f, 0x88),
454            LL(0x2a, 0x2a, 0xa8, 0x2a, 0x4d, 0x82, 0x54, 0x67),
455            LL(0xbb, 0xbb, 0xd6, 0xbb, 0xb1, 0x6d, 0x6b, 0x0a),
456            LL(0xc1, 0xc1, 0x23, 0xc1, 0x46, 0xe2, 0x9f, 0x87),
457            LL(0x53, 0x53, 0x51, 0x53, 0xa2, 0x02, 0xa6, 0xf1),
458            LL(0xdc, 0xdc, 0x57, 0xdc, 0xae, 0x8b, 0xa5, 0x72),
459            LL(0x0b, 0x0b, 0x2c, 0x0b, 0x58, 0x27, 0x16, 0x53),
460            LL(0x9d, 0x9d, 0x4e, 0x9d, 0x9c, 0xd3, 0x27, 0x01),
461            LL(0x6c, 0x6c, 0xad, 0x6c, 0x47, 0xc1, 0xd8, 0x2b),
462            LL(0x31, 0x31, 0xc4, 0x31, 0x95, 0xf5, 0x62, 0xa4),
463            LL(0x74, 0x74, 0xcd, 0x74, 0x87, 0xb9, 0xe8, 0xf3),
464            LL(0xf6, 0xf6, 0xff, 0xf6, 0xe3, 0x09, 0xf1, 0x15),
465            LL(0x46, 0x46, 0x05, 0x46, 0x0a, 0x43, 0x8c, 0x4c),
466            LL(0xac, 0xac, 0x8a, 0xac, 0x09, 0x26, 0x45, 0xa5),
467            LL(0x89, 0x89, 0x1e, 0x89, 0x3c, 0x97, 0x0f, 0xb5),
468            LL(0x14, 0x14, 0x50, 0x14, 0xa0, 0x44, 0x28, 0xb4),
469            LL(0xe1, 0xe1, 0xa3, 0xe1, 0x5b, 0x42, 0xdf, 0xba),
470            LL(0x16, 0x16, 0x58, 0x16, 0xb0, 0x4e, 0x2c, 0xa6),
471            LL(0x3a, 0x3a, 0xe8, 0x3a, 0xcd, 0xd2, 0x74, 0xf7),
472            LL(0x69, 0x69, 0xb9, 0x69, 0x6f, 0xd0, 0xd2, 0x06),
473            LL(0x09, 0x09, 0x24, 0x09, 0x48, 0x2d, 0x12, 0x41),
474            LL(0x70, 0x70, 0xdd, 0x70, 0xa7, 0xad, 0xe0, 0xd7),
475            LL(0xb6, 0xb6, 0xe2, 0xb6, 0xd9, 0x54, 0x71, 0x6f),
476            LL(0xd0, 0xd0, 0x67, 0xd0, 0xce, 0xb7, 0xbd, 0x1e),
477            LL(0xed, 0xed, 0x93, 0xed, 0x3b, 0x7e, 0xc7, 0xd6),
478            LL(0xcc, 0xcc, 0x17, 0xcc, 0x2e, 0xdb, 0x85, 0xe2),
479            LL(0x42, 0x42, 0x15, 0x42, 0x2a, 0x57, 0x84, 0x68),
480            LL(0x98, 0x98, 0x5a, 0x98, 0xb4, 0xc2, 0x2d, 0x2c),
481            LL(0xa4, 0xa4, 0xaa, 0xa4, 0x49, 0x0e, 0x55, 0xed),
482            LL(0x28, 0x28, 0xa0, 0x28, 0x5d, 0x88, 0x50, 0x75),
483            LL(0x5c, 0x5c, 0x6d, 0x5c, 0xda, 0x31, 0xb8, 0x86),
484            LL(0xf8, 0xf8, 0xc7, 0xf8, 0x93, 0x3f, 0xed, 0x6b),
485            LL(0x86, 0x86, 0x22, 0x86, 0x44, 0xa4, 0x11, 0xc2),
486#define RC      (&(Cx.q[256*N]))
487            0x18, 0x23, 0xc6, 0xe8, 0x87, 0xb8, 0x01, 0x4f,
488            /* rc[ROUNDS] */
489            0x36, 0xa6, 0xd2, 0xf5, 0x79, 0x6f, 0x91, 0x52, 0x60, 0xbc, 0x9b,
490            0x8e, 0xa3, 0x0c, 0x7b, 0x35, 0x1d, 0xe0, 0xd7, 0xc2, 0x2e, 0x4b,
491            0xfe, 0x57, 0x15, 0x77, 0x37, 0xe5, 0x9f, 0xf0, 0x4a, 0xda, 0x58,
492            0xc9, 0x29, 0x0a, 0xb1, 0xa0, 0x6b, 0x85, 0xbd, 0x5d, 0x10, 0xf4,
493            0xcb, 0x3e, 0x05, 0x67, 0xe4, 0x27, 0x41, 0x8b, 0xa7, 0x7d, 0x95,
494            0xd8, 0xfb, 0xee, 0x7c, 0x66, 0xdd, 0x17, 0x47, 0x9e, 0xca, 0x2d,
495            0xbf, 0x07, 0xad, 0x5a, 0x83, 0x33
496        }
497    };
498
499void whirlpool_block(WHIRLPOOL_CTX *ctx, const void *inp, size_t n)
500{
501    int r;
502    const u8 *p = inp;
503    union {
504        u64 q[8];
505        u8 c[64];
506    } S, K, *H = (void *)ctx->H.q;
507
508#ifdef GO_FOR_MMX
509    GO_FOR_MMX(ctx, inp, n);
510#endif
511    do {
512#ifdef OPENSSL_SMALL_FOOTPRINT
513        u64 L[8];
514        int i;
515
516        for (i = 0; i < 64; i++)
517            S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
518        for (r = 0; r < ROUNDS; r++) {
519            for (i = 0; i < 8; i++) {
520                L[i] = i ? 0 : RC[r];
521                L[i] ^= C0(K, i) ^ C1(K, (i - 1) & 7) ^
522                    C2(K, (i - 2) & 7) ^ C3(K, (i - 3) & 7) ^
523                    C4(K, (i - 4) & 7) ^ C5(K, (i - 5) & 7) ^
524                    C6(K, (i - 6) & 7) ^ C7(K, (i - 7) & 7);
525            }
526            memcpy(K.q, L, 64);
527            for (i = 0; i < 8; i++) {
528                L[i] ^= C0(S, i) ^ C1(S, (i - 1) & 7) ^
529                    C2(S, (i - 2) & 7) ^ C3(S, (i - 3) & 7) ^
530                    C4(S, (i - 4) & 7) ^ C5(S, (i - 5) & 7) ^
531                    C6(S, (i - 6) & 7) ^ C7(S, (i - 7) & 7);
532            }
533            memcpy(S.q, L, 64);
534        }
535        for (i = 0; i < 64; i++)
536            H->c[i] ^= S.c[i] ^ p[i];
537#else
538        u64 L0, L1, L2, L3, L4, L5, L6, L7;
539
540# ifdef STRICT_ALIGNMENT
541        if ((size_t)p & 7) {
542            memcpy(S.c, p, 64);
543            S.q[0] ^= (K.q[0] = H->q[0]);
544            S.q[1] ^= (K.q[1] = H->q[1]);
545            S.q[2] ^= (K.q[2] = H->q[2]);
546            S.q[3] ^= (K.q[3] = H->q[3]);
547            S.q[4] ^= (K.q[4] = H->q[4]);
548            S.q[5] ^= (K.q[5] = H->q[5]);
549            S.q[6] ^= (K.q[6] = H->q[6]);
550            S.q[7] ^= (K.q[7] = H->q[7]);
551        } else
552# endif
553        {
554            const u64_aX *pa = (const u64_aX *)p;
555            S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
556            S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
557            S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
558            S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
559            S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
560            S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
561            S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
562            S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
563        }
564
565        for (r = 0; r < ROUNDS; r++) {
566# ifdef SMALL_REGISTER_BANK
567            L0 = C0(K, 0) ^ C1(K, 7) ^ C2(K, 6) ^ C3(K, 5) ^
568                C4(K, 4) ^ C5(K, 3) ^ C6(K, 2) ^ C7(K, 1) ^ RC[r];
569            L1 = C0(K, 1) ^ C1(K, 0) ^ C2(K, 7) ^ C3(K, 6) ^
570                C4(K, 5) ^ C5(K, 4) ^ C6(K, 3) ^ C7(K, 2);
571            L2 = C0(K, 2) ^ C1(K, 1) ^ C2(K, 0) ^ C3(K, 7) ^
572                C4(K, 6) ^ C5(K, 5) ^ C6(K, 4) ^ C7(K, 3);
573            L3 = C0(K, 3) ^ C1(K, 2) ^ C2(K, 1) ^ C3(K, 0) ^
574                C4(K, 7) ^ C5(K, 6) ^ C6(K, 5) ^ C7(K, 4);
575            L4 = C0(K, 4) ^ C1(K, 3) ^ C2(K, 2) ^ C3(K, 1) ^
576                C4(K, 0) ^ C5(K, 7) ^ C6(K, 6) ^ C7(K, 5);
577            L5 = C0(K, 5) ^ C1(K, 4) ^ C2(K, 3) ^ C3(K, 2) ^
578                C4(K, 1) ^ C5(K, 0) ^ C6(K, 7) ^ C7(K, 6);
579            L6 = C0(K, 6) ^ C1(K, 5) ^ C2(K, 4) ^ C3(K, 3) ^
580                C4(K, 2) ^ C5(K, 1) ^ C6(K, 0) ^ C7(K, 7);
581            L7 = C0(K, 7) ^ C1(K, 6) ^ C2(K, 5) ^ C3(K, 4) ^
582                C4(K, 3) ^ C5(K, 2) ^ C6(K, 1) ^ C7(K, 0);
583
584            K.q[0] = L0;
585            K.q[1] = L1;
586            K.q[2] = L2;
587            K.q[3] = L3;
588            K.q[4] = L4;
589            K.q[5] = L5;
590            K.q[6] = L6;
591            K.q[7] = L7;
592
593            L0 ^= C0(S, 0) ^ C1(S, 7) ^ C2(S, 6) ^ C3(S, 5) ^
594                C4(S, 4) ^ C5(S, 3) ^ C6(S, 2) ^ C7(S, 1);
595            L1 ^= C0(S, 1) ^ C1(S, 0) ^ C2(S, 7) ^ C3(S, 6) ^
596                C4(S, 5) ^ C5(S, 4) ^ C6(S, 3) ^ C7(S, 2);
597            L2 ^= C0(S, 2) ^ C1(S, 1) ^ C2(S, 0) ^ C3(S, 7) ^
598                C4(S, 6) ^ C5(S, 5) ^ C6(S, 4) ^ C7(S, 3);
599            L3 ^= C0(S, 3) ^ C1(S, 2) ^ C2(S, 1) ^ C3(S, 0) ^
600                C4(S, 7) ^ C5(S, 6) ^ C6(S, 5) ^ C7(S, 4);
601            L4 ^= C0(S, 4) ^ C1(S, 3) ^ C2(S, 2) ^ C3(S, 1) ^
602                C4(S, 0) ^ C5(S, 7) ^ C6(S, 6) ^ C7(S, 5);
603            L5 ^= C0(S, 5) ^ C1(S, 4) ^ C2(S, 3) ^ C3(S, 2) ^
604                C4(S, 1) ^ C5(S, 0) ^ C6(S, 7) ^ C7(S, 6);
605            L6 ^= C0(S, 6) ^ C1(S, 5) ^ C2(S, 4) ^ C3(S, 3) ^
606                C4(S, 2) ^ C5(S, 1) ^ C6(S, 0) ^ C7(S, 7);
607            L7 ^= C0(S, 7) ^ C1(S, 6) ^ C2(S, 5) ^ C3(S, 4) ^
608                C4(S, 3) ^ C5(S, 2) ^ C6(S, 1) ^ C7(S, 0);
609
610            S.q[0] = L0;
611            S.q[1] = L1;
612            S.q[2] = L2;
613            S.q[3] = L3;
614            S.q[4] = L4;
615            S.q[5] = L5;
616            S.q[6] = L6;
617            S.q[7] = L7;
618# else
619            L0 = C0(K, 0);
620            L1 = C1(K, 0);
621            L2 = C2(K, 0);
622            L3 = C3(K, 0);
623            L4 = C4(K, 0);
624            L5 = C5(K, 0);
625            L6 = C6(K, 0);
626            L7 = C7(K, 0);
627            L0 ^= RC[r];
628
629            L1 ^= C0(K, 1);
630            L2 ^= C1(K, 1);
631            L3 ^= C2(K, 1);
632            L4 ^= C3(K, 1);
633            L5 ^= C4(K, 1);
634            L6 ^= C5(K, 1);
635            L7 ^= C6(K, 1);
636            L0 ^= C7(K, 1);
637
638            L2 ^= C0(K, 2);
639            L3 ^= C1(K, 2);
640            L4 ^= C2(K, 2);
641            L5 ^= C3(K, 2);
642            L6 ^= C4(K, 2);
643            L7 ^= C5(K, 2);
644            L0 ^= C6(K, 2);
645            L1 ^= C7(K, 2);
646
647            L3 ^= C0(K, 3);
648            L4 ^= C1(K, 3);
649            L5 ^= C2(K, 3);
650            L6 ^= C3(K, 3);
651            L7 ^= C4(K, 3);
652            L0 ^= C5(K, 3);
653            L1 ^= C6(K, 3);
654            L2 ^= C7(K, 3);
655
656            L4 ^= C0(K, 4);
657            L5 ^= C1(K, 4);
658            L6 ^= C2(K, 4);
659            L7 ^= C3(K, 4);
660            L0 ^= C4(K, 4);
661            L1 ^= C5(K, 4);
662            L2 ^= C6(K, 4);
663            L3 ^= C7(K, 4);
664
665            L5 ^= C0(K, 5);
666            L6 ^= C1(K, 5);
667            L7 ^= C2(K, 5);
668            L0 ^= C3(K, 5);
669            L1 ^= C4(K, 5);
670            L2 ^= C5(K, 5);
671            L3 ^= C6(K, 5);
672            L4 ^= C7(K, 5);
673
674            L6 ^= C0(K, 6);
675            L7 ^= C1(K, 6);
676            L0 ^= C2(K, 6);
677            L1 ^= C3(K, 6);
678            L2 ^= C4(K, 6);
679            L3 ^= C5(K, 6);
680            L4 ^= C6(K, 6);
681            L5 ^= C7(K, 6);
682
683            L7 ^= C0(K, 7);
684            L0 ^= C1(K, 7);
685            L1 ^= C2(K, 7);
686            L2 ^= C3(K, 7);
687            L3 ^= C4(K, 7);
688            L4 ^= C5(K, 7);
689            L5 ^= C6(K, 7);
690            L6 ^= C7(K, 7);
691
692            K.q[0] = L0;
693            K.q[1] = L1;
694            K.q[2] = L2;
695            K.q[3] = L3;
696            K.q[4] = L4;
697            K.q[5] = L5;
698            K.q[6] = L6;
699            K.q[7] = L7;
700
701            L0 ^= C0(S, 0);
702            L1 ^= C1(S, 0);
703            L2 ^= C2(S, 0);
704            L3 ^= C3(S, 0);
705            L4 ^= C4(S, 0);
706            L5 ^= C5(S, 0);
707            L6 ^= C6(S, 0);
708            L7 ^= C7(S, 0);
709
710            L1 ^= C0(S, 1);
711            L2 ^= C1(S, 1);
712            L3 ^= C2(S, 1);
713            L4 ^= C3(S, 1);
714            L5 ^= C4(S, 1);
715            L6 ^= C5(S, 1);
716            L7 ^= C6(S, 1);
717            L0 ^= C7(S, 1);
718
719            L2 ^= C0(S, 2);
720            L3 ^= C1(S, 2);
721            L4 ^= C2(S, 2);
722            L5 ^= C3(S, 2);
723            L6 ^= C4(S, 2);
724            L7 ^= C5(S, 2);
725            L0 ^= C6(S, 2);
726            L1 ^= C7(S, 2);
727
728            L3 ^= C0(S, 3);
729            L4 ^= C1(S, 3);
730            L5 ^= C2(S, 3);
731            L6 ^= C3(S, 3);
732            L7 ^= C4(S, 3);
733            L0 ^= C5(S, 3);
734            L1 ^= C6(S, 3);
735            L2 ^= C7(S, 3);
736
737            L4 ^= C0(S, 4);
738            L5 ^= C1(S, 4);
739            L6 ^= C2(S, 4);
740            L7 ^= C3(S, 4);
741            L0 ^= C4(S, 4);
742            L1 ^= C5(S, 4);
743            L2 ^= C6(S, 4);
744            L3 ^= C7(S, 4);
745
746            L5 ^= C0(S, 5);
747            L6 ^= C1(S, 5);
748            L7 ^= C2(S, 5);
749            L0 ^= C3(S, 5);
750            L1 ^= C4(S, 5);
751            L2 ^= C5(S, 5);
752            L3 ^= C6(S, 5);
753            L4 ^= C7(S, 5);
754
755            L6 ^= C0(S, 6);
756            L7 ^= C1(S, 6);
757            L0 ^= C2(S, 6);
758            L1 ^= C3(S, 6);
759            L2 ^= C4(S, 6);
760            L3 ^= C5(S, 6);
761            L4 ^= C6(S, 6);
762            L5 ^= C7(S, 6);
763
764            L7 ^= C0(S, 7);
765            L0 ^= C1(S, 7);
766            L1 ^= C2(S, 7);
767            L2 ^= C3(S, 7);
768            L3 ^= C4(S, 7);
769            L4 ^= C5(S, 7);
770            L5 ^= C6(S, 7);
771            L6 ^= C7(S, 7);
772
773            S.q[0] = L0;
774            S.q[1] = L1;
775            S.q[2] = L2;
776            S.q[3] = L3;
777            S.q[4] = L4;
778            S.q[5] = L5;
779            S.q[6] = L6;
780            S.q[7] = L7;
781# endif
782        }
783
784# ifdef STRICT_ALIGNMENT
785        if ((size_t)p & 7) {
786            int i;
787            for (i = 0; i < 64; i++)
788                H->c[i] ^= S.c[i] ^ p[i];
789        } else
790# endif
791        {
792            const u64_aX *pa = (const u64_aX *)p;
793            H->q[0] ^= S.q[0] ^ pa[0];
794            H->q[1] ^= S.q[1] ^ pa[1];
795            H->q[2] ^= S.q[2] ^ pa[2];
796            H->q[3] ^= S.q[3] ^ pa[3];
797            H->q[4] ^= S.q[4] ^ pa[4];
798            H->q[5] ^= S.q[5] ^ pa[5];
799            H->q[6] ^= S.q[6] ^ pa[6];
800            H->q[7] ^= S.q[7] ^ pa[7];
801        }
802#endif
803        p += 64;
804    } while (--n);
805}
806