1/**
2 * The Whirlpool hashing function.
3 *
4 * <P>
5 * <b>References</b>
6 *
7 * <P>
8 * The Whirlpool algorithm was developed by
9 * <a href="mailto:pbarreto@scopus.com.br">Paulo S. L. M. Barreto</a> and
10 * <a href="mailto:vincent.rijmen@cryptomathic.com">Vincent Rijmen</a>.
11 *
12 * See
13 *      P.S.L.M. Barreto, V. Rijmen,
14 *      ``The Whirlpool hashing function,''
15 *      NESSIE submission, 2000 (tweaked version, 2001),
16 *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
17 *
18 * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
19 * Vincent Rijmen. Lookup "reference implementations" on
20 * <http://planeta.terra.com.br/informatica/paulobarreto/>
21 *
22 * =============================================================================
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
25 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
34 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 *
36 */
37
38#include "wp_locl.h"
39#include <string.h>
40
41typedef unsigned char		u8;
42#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32)
43typedef unsigned __int64	u64;
44#elif defined(__arch64__)
45typedef unsigned long		u64;
46#else
47typedef unsigned long long	u64;
48#endif
49
50#define ROUNDS	10
51
52#define STRICT_ALIGNMENT
53#if defined(__i386) || defined(__i386__) || \
54    defined(__x86_64) || defined(__x86_64__) || \
55    defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)
56/* Well, formally there're couple of other architectures, which permit
57 * unaligned loads, specifically those not crossing cache lines, IA-64
58 * and PowerPC... */
59#  undef STRICT_ALIGNMENT
60#endif
61
62#undef SMALL_REGISTER_BANK
63#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
64#  define SMALL_REGISTER_BANK
65#  if defined(WHIRLPOOL_ASM)
66#    ifndef OPENSSL_SMALL_FOOTPRINT
67#      define OPENSSL_SMALL_FOOTPRINT	/* it appears that for elder non-MMX
68					   CPUs this is actually faster! */
69#    endif
70#    define GO_FOR_MMX(ctx,inp,num)	do {			\
71	extern unsigned int OPENSSL_ia32cap_P[];		\
72	void whirlpool_block_mmx(void *,const void *,size_t);	\
73	if (!(OPENSSL_ia32cap_P[0] & (1<<23)))	break;		\
74        whirlpool_block_mmx(ctx->H.c,inp,num);	return;		\
75					} while (0)
76#  endif
77#endif
78
79#undef ROTATE
80#if defined(_MSC_VER)
81#  if defined(_WIN64)	/* applies to both IA-64 and AMD64 */
82#    pragma intrinsic(_rotl64)
83#    define ROTATE(a,n)	_rotl64((a),n)
84#  endif
85#elif defined(__GNUC__) && __GNUC__>=2
86#  if defined(__x86_64) || defined(__x86_64__)
87#    if defined(L_ENDIAN)
88#      define ROTATE(a,n)	({ u64 ret; asm ("rolq %1,%0"	\
89				   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
90#    elif defined(B_ENDIAN)
91       /* Most will argue that x86_64 is always little-endian. Well,
92        * yes, but then we have stratus.com who has modified gcc to
93	* "emulate" big-endian on x86. Is there evidence that they
94	* [or somebody else] won't do same for x86_64? Naturally no.
95	* And this line is waiting ready for that brave soul:-) */
96#      define ROTATE(a,n)	({ u64 ret; asm ("rorq %1,%0"	\
97				   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
98#    endif
99#  elif defined(__ia64) || defined(__ia64__)
100#    if defined(L_ENDIAN)
101#      define ROTATE(a,n)	({ u64 ret; asm ("shrp %0=%1,%1,%2"	\
102				   : "=r"(ret) : "r"(a),"M"(64-(n))); ret; })
103#    elif defined(B_ENDIAN)
104#      define ROTATE(a,n)	({ u64 ret; asm ("shrp %0=%1,%1,%2"	\
105				   : "=r"(ret) : "r"(a),"M"(n)); ret; })
106#    endif
107#  endif
108#endif
109
110#if defined(OPENSSL_SMALL_FOOTPRINT)
111#  if !defined(ROTATE)
112#    if defined(L_ENDIAN)	/* little-endians have to rotate left */
113#      define ROTATE(i,n)	((i)<<(n) ^ (i)>>(64-n))
114#    elif defined(B_ENDIAN)	/* big-endians have to rotate right */
115#      define ROTATE(i,n)	((i)>>(n) ^ (i)<<(64-n))
116#    endif
117#  endif
118#  if defined(ROTATE) && !defined(STRICT_ALIGNMENT)
119#    define STRICT_ALIGNMENT	/* ensure smallest table size */
120#  endif
121#endif
122
123/*
124 * Table size depends on STRICT_ALIGNMENT and whether or not endian-
125 * specific ROTATE macro is defined. If STRICT_ALIGNMENT is not
126 * defined, which is normally the case on x86[_64] CPUs, the table is
127 * 4KB large unconditionally. Otherwise if ROTATE is defined, the
128 * table is 2KB large, and otherwise - 16KB. 2KB table requires a
129 * whole bunch of additional rotations, but I'm willing to "trade,"
130 * because 16KB table certainly trashes L1 cache. I wish all CPUs
131 * could handle unaligned load as 4KB table doesn't trash the cache,
132 * nor does it require additional rotations.
133 */
134/*
135 * Note that every Cn macro expands as two loads: one byte load and
136 * one quadword load. One can argue that that many single-byte loads
137 * is too excessive, as one could load a quadword and "milk" it for
138 * eight 8-bit values instead. Well, yes, but in order to do so *and*
139 * avoid excessive loads you have to accomodate a handful of 64-bit
140 * values in the register bank and issue a bunch of shifts and mask.
141 * It's a tradeoff: loads vs. shift and mask in big register bank[!].
142 * On most CPUs eight single-byte loads are faster and I let other
143 * ones to depend on smart compiler to fold byte loads if beneficial.
144 * Hand-coded assembler would be another alternative:-)
145 */
146#ifdef STRICT_ALIGNMENT
147#  if defined(ROTATE)
148#    define N	1
149#    define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7
150#    define C0(K,i)	(Cx.q[K.c[(i)*8+0]])
151#    define C1(K,i)	ROTATE(Cx.q[K.c[(i)*8+1]],8)
152#    define C2(K,i)	ROTATE(Cx.q[K.c[(i)*8+2]],16)
153#    define C3(K,i)	ROTATE(Cx.q[K.c[(i)*8+3]],24)
154#    define C4(K,i)	ROTATE(Cx.q[K.c[(i)*8+4]],32)
155#    define C5(K,i)	ROTATE(Cx.q[K.c[(i)*8+5]],40)
156#    define C6(K,i)	ROTATE(Cx.q[K.c[(i)*8+6]],48)
157#    define C7(K,i)	ROTATE(Cx.q[K.c[(i)*8+7]],56)
158#  else
159#    define N	8
160#    define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7, \
161					c7,c0,c1,c2,c3,c4,c5,c6, \
162					c6,c7,c0,c1,c2,c3,c4,c5, \
163					c5,c6,c7,c0,c1,c2,c3,c4, \
164					c4,c5,c6,c7,c0,c1,c2,c3, \
165					c3,c4,c5,c6,c7,c0,c1,c2, \
166					c2,c3,c4,c5,c6,c7,c0,c1, \
167					c1,c2,c3,c4,c5,c6,c7,c0
168#    define C0(K,i)	(Cx.q[0+8*K.c[(i)*8+0]])
169#    define C1(K,i)	(Cx.q[1+8*K.c[(i)*8+1]])
170#    define C2(K,i)	(Cx.q[2+8*K.c[(i)*8+2]])
171#    define C3(K,i)	(Cx.q[3+8*K.c[(i)*8+3]])
172#    define C4(K,i)	(Cx.q[4+8*K.c[(i)*8+4]])
173#    define C5(K,i)	(Cx.q[5+8*K.c[(i)*8+5]])
174#    define C6(K,i)	(Cx.q[6+8*K.c[(i)*8+6]])
175#    define C7(K,i)	(Cx.q[7+8*K.c[(i)*8+7]])
176#  endif
177#else
178#  define N	2
179#  define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7, \
180					c0,c1,c2,c3,c4,c5,c6,c7
181#  define C0(K,i)	(((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
182#  define C1(K,i)	(((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
183#  define C2(K,i)	(((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
184#  define C3(K,i)	(((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
185#  define C4(K,i)	(((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
186#  define C5(K,i)	(((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
187#  define C6(K,i)	(((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
188#  define C7(K,i)	(((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
189#endif
190
191static const
192union	{
193	u8	c[(256*N+ROUNDS)*sizeof(u64)];
194	u64	q[(256*N+ROUNDS)];
195	} Cx = { {
196	/* Note endian-neutral representation:-) */
197	LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8),
198	LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26),
199	LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8),
200	LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb),
201	LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb),
202	LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11),
203	LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09),
204	LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d),
205	LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b),
206	LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff),
207	LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c),
208	LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e),
209	LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96),
210	LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30),
211	LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d),
212	LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8),
213	LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47),
214	LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35),
215	LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37),
216	LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a),
217	LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2),
218	LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c),
219	LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84),
220	LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80),
221	LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5),
222	LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3),
223	LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21),
224	LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c),
225	LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43),
226	LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29),
227	LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d),
228	LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5),
229	LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd),
230	LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8),
231	LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92),
232	LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e),
233	LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13),
234	LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23),
235	LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20),
236	LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44),
237	LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2),
238	LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf),
239	LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c),
240	LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a),
241	LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50),
242	LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9),
243	LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14),
244	LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9),
245	LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c),
246	LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f),
247	LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90),
248	LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07),
249	LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd),
250	LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3),
251	LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d),
252	LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78),
253	LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97),
254	LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02),
255	LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73),
256	LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7),
257	LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6),
258	LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2),
259	LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49),
260	LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56),
261	LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70),
262	LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd),
263	LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb),
264	LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71),
265	LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b),
266	LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf),
267	LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45),
268	LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a),
269	LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4),
270	LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58),
271	LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e),
272	LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f),
273	LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac),
274	LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0),
275	LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef),
276	LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6),
277	LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c),
278	LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12),
279	LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93),
280	LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde),
281	LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6),
282	LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1),
283	LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b),
284	LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f),
285	LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31),
286	LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8),
287	LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9),
288	LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc),
289	LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e),
290	LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b),
291	LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf),
292	LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59),
293	LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2),
294	LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77),
295	LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33),
296	LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4),
297	LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27),
298	LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb),
299	LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89),
300	LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32),
301	LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54),
302	LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d),
303	LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64),
304	LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d),
305	LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d),
306	LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f),
307	LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca),
308	LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7),
309	LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d),
310	LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce),
311	LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f),
312	LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f),
313	LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63),
314	LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a),
315	LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc),
316	LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82),
317	LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a),
318	LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48),
319	LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95),
320	LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf),
321	LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d),
322	LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0),
323	LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91),
324	LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8),
325	LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b),
326	LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00),
327	LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9),
328	LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e),
329	LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1),
330	LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6),
331	LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28),
332	LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3),
333	LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74),
334	LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe),
335	LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d),
336	LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea),
337	LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57),
338	LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38),
339	LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad),
340	LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4),
341	LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda),
342	LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7),
343	LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb),
344	LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9),
345	LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a),
346	LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03),
347	LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a),
348	LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e),
349	LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60),
350	LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc),
351	LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46),
352	LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f),
353	LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76),
354	LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa),
355	LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36),
356	LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae),
357	LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b),
358	LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85),
359	LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e),
360	LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7),
361	LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55),
362	LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a),
363	LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81),
364	LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52),
365	LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62),
366	LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3),
367	LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10),
368	LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab),
369	LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0),
370	LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5),
371	LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec),
372	LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16),
373	LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94),
374	LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f),
375	LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5),
376	LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98),
377	LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17),
378	LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4),
379	LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1),
380	LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e),
381	LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42),
382	LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34),
383	LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08),
384	LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee),
385	LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61),
386	LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1),
387	LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f),
388	LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24),
389	LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3),
390	LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25),
391	LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22),
392	LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65),
393	LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79),
394	LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69),
395	LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9),
396	LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19),
397	LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe),
398	LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a),
399	LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0),
400	LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99),
401	LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83),
402	LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04),
403	LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66),
404	LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0),
405	LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1),
406	LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd),
407	LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40),
408	LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c),
409	LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18),
410	LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b),
411	LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51),
412	LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05),
413	LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c),
414	LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39),
415	LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa),
416	LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b),
417	LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc),
418	LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e),
419	LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0),
420	LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88),
421	LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67),
422	LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a),
423	LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87),
424	LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1),
425	LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72),
426	LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53),
427	LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01),
428	LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b),
429	LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4),
430	LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3),
431	LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15),
432	LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c),
433	LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5),
434	LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5),
435	LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4),
436	LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba),
437	LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6),
438	LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7),
439	LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06),
440	LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41),
441	LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7),
442	LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f),
443	LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e),
444	LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6),
445	LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2),
446	LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68),
447	LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c),
448	LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed),
449	LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75),
450	LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86),
451	LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b),
452	LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2),
453#define RC	(&(Cx.q[256*N]))
454	0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f,	/* rc[ROUNDS] */
455	0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52,
456	0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35,
457	0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57,
458	0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda,
459	0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85,
460	0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67,
461	0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8,
462	0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e,
463	0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33
464	}
465};
466
467void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
468	{
469	int	r;
470	const u8 *p=inp;
471	union	{ u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;
472
473#ifdef GO_FOR_MMX
474	GO_FOR_MMX(ctx,inp,n);
475#endif
476							do {
477#ifdef OPENSSL_SMALL_FOOTPRINT
478	u64	L[8];
479	int	i;
480
481	for (i=0;i<64;i++)	S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
482	for (r=0;r<ROUNDS;r++)
483		{
484		for (i=0;i<8;i++)
485			{
486			L[i]  = i ? 0 : RC[r];
487			L[i] ^=	C0(K,i)       ^ C1(K,(i-1)&7) ^
488				C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
489				C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
490				C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
491			}
492		memcpy (K.q,L,64);
493		for (i=0;i<8;i++)
494			{
495			L[i] ^= C0(S,i)       ^ C1(S,(i-1)&7) ^
496				C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
497				C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
498				C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
499			}
500		memcpy (S.q,L,64);
501		}
502	for (i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
503#else
504	u64	L0,L1,L2,L3,L4,L5,L6,L7;
505
506#ifdef STRICT_ALIGNMENT
507	if ((size_t)p & 7)
508		{
509		memcpy (S.c,p,64);
510		S.q[0] ^= (K.q[0] = H->q[0]);
511		S.q[1] ^= (K.q[1] = H->q[1]);
512		S.q[2] ^= (K.q[2] = H->q[2]);
513		S.q[3] ^= (K.q[3] = H->q[3]);
514		S.q[4] ^= (K.q[4] = H->q[4]);
515		S.q[5] ^= (K.q[5] = H->q[5]);
516		S.q[6] ^= (K.q[6] = H->q[6]);
517		S.q[7] ^= (K.q[7] = H->q[7]);
518		}
519	else
520#endif
521		{
522		const u64 *pa = (const u64*)p;
523		S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
524		S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
525		S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
526		S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
527		S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
528		S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
529		S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
530		S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
531		}
532
533	for(r=0;r<ROUNDS;r++)
534		{
535#ifdef SMALL_REGISTER_BANK
536		L0 =	C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
537			C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
538		L1 =	C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
539			C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
540		L2 =	C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
541			C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
542		L3 =	C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
543			C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
544		L4 =	C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
545			C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
546		L5 =	C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
547			C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
548		L6 =	C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
549			C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
550		L7 =	C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
551			C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);
552
553		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
554		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
555
556		L0 ^=	C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
557			C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
558		L1 ^=	C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
559			C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
560		L2 ^=	C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
561			C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
562		L3 ^=	C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
563			C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
564		L4 ^=	C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
565			C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
566		L5 ^=	C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
567			C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
568		L6 ^=	C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
569			C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
570		L7 ^=	C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
571			C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);
572
573		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
574		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
575#else
576		L0  = C0(K,0); L1  = C1(K,0); L2  = C2(K,0); L3  = C3(K,0);
577		L4  = C4(K,0); L5  = C5(K,0); L6  = C6(K,0); L7  = C7(K,0);
578		L0 ^= RC[r];
579
580		L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
581		L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);
582
583		L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
584		L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);
585
586		L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
587		L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);
588
589		L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
590		L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);
591
592		L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
593		L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);
594
595		L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
596		L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);
597
598		L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
599		L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);
600
601		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
602		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
603
604		L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
605		L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);
606
607		L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
608		L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);
609
610		L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
611		L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);
612
613		L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
614		L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);
615
616		L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
617		L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);
618
619		L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
620		L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);
621
622		L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
623		L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);
624
625		L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
626		L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);
627
628		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
629		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
630#endif
631		}
632
633#ifdef STRICT_ALIGNMENT
634	if ((size_t)p & 7)
635		{
636		int i;
637		for(i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
638		}
639	else
640#endif
641		{
642		const u64 *pa=(const u64 *)p;
643		H->q[0] ^= S.q[0] ^ pa[0];
644		H->q[1] ^= S.q[1] ^ pa[1];
645		H->q[2] ^= S.q[2] ^ pa[2];
646		H->q[3] ^= S.q[3] ^ pa[3];
647		H->q[4] ^= S.q[4] ^ pa[4];
648		H->q[5] ^= S.q[5] ^ pa[5];
649		H->q[6] ^= S.q[6] ^ pa[6];
650		H->q[7] ^= S.q[7] ^ pa[7];
651		}
652#endif
653							p += 64;
654							} while(--n);
655	}
656