1/* $OpenBSD: whirlpool.c,v 1.3 2024/06/01 07:36:17 tb Exp $ */
2/**
3 * The Whirlpool hashing function.
4 *
5 * <P>
6 * <b>References</b>
7 *
8 * <P>
9 * The Whirlpool algorithm was developed by
10 * <a href="mailto:pbarreto@scopus.com.br">Paulo S. L. M. Barreto</a> and
11 * <a href="mailto:vincent.rijmen@cryptomathic.com">Vincent Rijmen</a>.
12 *
13 * See
14 *      P.S.L.M. Barreto, V. Rijmen,
15 *      ``The Whirlpool hashing function,''
16 *      NESSIE submission, 2000 (tweaked version, 2001),
17 *      <https://www.cosic.esat.kuleuven.ac.be/nessie/workshop/submissions/whirlpool.zip>
18 *
19 * Based on "@version 3.0 (2003.03.12)" by Paulo S.L.M. Barreto and
20 * Vincent Rijmen. Lookup "reference implementations" on
21 * <http://planeta.terra.com.br/informatica/paulobarreto/>
22 *
23 * =============================================================================
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
34 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
35 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 */
38
39/*
40 * OpenSSL-specific implementation notes.
41 *
42 * WHIRLPOOL_Update as well as one-stroke WHIRLPOOL both expect
43 * number of *bytes* as input length argument. Bit-oriented routine
44 * as specified by authors is called WHIRLPOOL_BitUpdate[!] and
45 * does not have one-stroke counterpart.
46 *
47 * WHIRLPOOL_BitUpdate implements byte-oriented loop, essentially
48 * to serve WHIRLPOOL_Update. This is done for performance.
49 *
50 * Unlike authors' reference implementation, block processing
51 * routine whirlpool_block is designed to operate on multi-block
52 * input. This is done for performance.
53 */
54
55#include <endian.h>
56#include <string.h>
57
58#include <openssl/crypto.h>
59#include <openssl/whrlpool.h>
60
61typedef unsigned char		u8;
62#if defined(_LP64)
63typedef unsigned long		u64;
64#else
65typedef unsigned long long	u64;
66#endif
67
68#define ROUNDS	10
69
70#undef SMALL_REGISTER_BANK
71#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
72#  define SMALL_REGISTER_BANK
73#  if defined(WHIRLPOOL_ASM)
74#    ifndef OPENSSL_SMALL_FOOTPRINT
75#      define OPENSSL_SMALL_FOOTPRINT	/* it appears that for elder non-MMX
76					   CPUs this is actually faster! */
77#    endif
78#include "x86_arch.h"
79#    define GO_FOR_MMX(ctx,inp,num)				\
80do {								\
81	void whirlpool_block_mmx(void *,const void *,size_t);	\
82	if ((OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) == 0)	\
83		break;						\
84        whirlpool_block_mmx(ctx->H.c,inp,num);			\
85	return;							\
86} while (0)
87#  endif
88#elif defined(__arm__)
89#  define SMALL_REGISTER_BANK
90#endif
91
92#undef ROTATE
93#if defined(__GNUC__) && __GNUC__>=2
94#  if defined(__x86_64) || defined(__x86_64__)
95#      define ROTATE(a,n)	({ u64 ret; asm ("rolq %1,%0"	\
96				   : "=r"(ret) : "J"(n),"0"(a) : "cc"); ret; })
97#  endif
98#endif
99
100#if defined(OPENSSL_SMALL_FOOTPRINT)
101#  if !defined(ROTATE)
102#    if BYTE_ORDER == LITTLE_ENDIAN	/* little-endians have to rotate left */
103#      define ROTATE(i,n)	((i)<<(n) ^ (i)>>(64-n))
104#    else				/* big-endians have to rotate right */
105#      define ROTATE(i,n)	((i)>>(n) ^ (i)<<(64-n))
106#    endif
107#  endif
108#  if defined(ROTATE) && !defined(__STRICT_ALIGNMENT)
109#    define __STRICT_ALIGNMENT	/* ensure smallest table size */
110#  endif
111#endif
112
113/*
114 * Table size depends on __STRICT_ALIGNMENT and whether or not endian-
115 * specific ROTATE macro is defined. If __STRICT_ALIGNMENT is not
116 * defined, which is normally the case on x86[_64] CPUs, the table is
117 * 4KB large unconditionally. Otherwise if ROTATE is defined, the
118 * table is 2KB large, and otherwise - 16KB. 2KB table requires a
119 * whole bunch of additional rotations, but I'm willing to "trade,"
120 * because 16KB table certainly trashes L1 cache. I wish all CPUs
121 * could handle unaligned load as 4KB table doesn't trash the cache,
122 * nor does it require additional rotations.
123 */
124/*
125 * Note that every Cn macro expands as two loads: one byte load and
126 * one quadword load. One can argue that that many single-byte loads
127 * is too excessive, as one could load a quadword and "milk" it for
128 * eight 8-bit values instead. Well, yes, but in order to do so *and*
129 * avoid excessive loads you have to accommodate a handful of 64-bit
130 * values in the register bank and issue a bunch of shifts and mask.
131 * It's a tradeoff: loads vs. shift and mask in big register bank[!].
132 * On most CPUs eight single-byte loads are faster and I let other
133 * ones to depend on smart compiler to fold byte loads if beneficial.
134 * Hand-coded assembler would be another alternative:-)
135 */
136#ifdef __STRICT_ALIGNMENT
137#  if defined(ROTATE)
138#    define N	1
139#    define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7
140#    define C0(K,i)	(Cx.q[K.c[(i)*8+0]])
141#    define C1(K,i)	ROTATE(Cx.q[K.c[(i)*8+1]],8)
142#    define C2(K,i)	ROTATE(Cx.q[K.c[(i)*8+2]],16)
143#    define C3(K,i)	ROTATE(Cx.q[K.c[(i)*8+3]],24)
144#    define C4(K,i)	ROTATE(Cx.q[K.c[(i)*8+4]],32)
145#    define C5(K,i)	ROTATE(Cx.q[K.c[(i)*8+5]],40)
146#    define C6(K,i)	ROTATE(Cx.q[K.c[(i)*8+6]],48)
147#    define C7(K,i)	ROTATE(Cx.q[K.c[(i)*8+7]],56)
148#  else
149#    define N	8
150#    define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7, \
151					c7,c0,c1,c2,c3,c4,c5,c6, \
152					c6,c7,c0,c1,c2,c3,c4,c5, \
153					c5,c6,c7,c0,c1,c2,c3,c4, \
154					c4,c5,c6,c7,c0,c1,c2,c3, \
155					c3,c4,c5,c6,c7,c0,c1,c2, \
156					c2,c3,c4,c5,c6,c7,c0,c1, \
157					c1,c2,c3,c4,c5,c6,c7,c0
158#    define C0(K,i)	(Cx.q[0+8*K.c[(i)*8+0]])
159#    define C1(K,i)	(Cx.q[1+8*K.c[(i)*8+1]])
160#    define C2(K,i)	(Cx.q[2+8*K.c[(i)*8+2]])
161#    define C3(K,i)	(Cx.q[3+8*K.c[(i)*8+3]])
162#    define C4(K,i)	(Cx.q[4+8*K.c[(i)*8+4]])
163#    define C5(K,i)	(Cx.q[5+8*K.c[(i)*8+5]])
164#    define C6(K,i)	(Cx.q[6+8*K.c[(i)*8+6]])
165#    define C7(K,i)	(Cx.q[7+8*K.c[(i)*8+7]])
166#  endif
167#else
168#  define N	2
169#  define LL(c0,c1,c2,c3,c4,c5,c6,c7)	c0,c1,c2,c3,c4,c5,c6,c7, \
170					c0,c1,c2,c3,c4,c5,c6,c7
171#  define C0(K,i)	(((u64*)(Cx.c+0))[2*K.c[(i)*8+0]])
172#  define C1(K,i)	(((u64*)(Cx.c+7))[2*K.c[(i)*8+1]])
173#  define C2(K,i)	(((u64*)(Cx.c+6))[2*K.c[(i)*8+2]])
174#  define C3(K,i)	(((u64*)(Cx.c+5))[2*K.c[(i)*8+3]])
175#  define C4(K,i)	(((u64*)(Cx.c+4))[2*K.c[(i)*8+4]])
176#  define C5(K,i)	(((u64*)(Cx.c+3))[2*K.c[(i)*8+5]])
177#  define C6(K,i)	(((u64*)(Cx.c+2))[2*K.c[(i)*8+6]])
178#  define C7(K,i)	(((u64*)(Cx.c+1))[2*K.c[(i)*8+7]])
179#endif
180
181static const
182union	{
183	u8	c[(256*N+ROUNDS)*sizeof(u64)];
184	u64	q[(256*N+ROUNDS)];
185	} Cx = { {
186	/* Note endian-neutral representation:-) */
187	LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8),
188	LL(0x23,0x23,0x8c,0x23,0x05,0xaf,0x46,0x26),
189	LL(0xc6,0xc6,0x3f,0xc6,0x7e,0xf9,0x91,0xb8),
190	LL(0xe8,0xe8,0x87,0xe8,0x13,0x6f,0xcd,0xfb),
191	LL(0x87,0x87,0x26,0x87,0x4c,0xa1,0x13,0xcb),
192	LL(0xb8,0xb8,0xda,0xb8,0xa9,0x62,0x6d,0x11),
193	LL(0x01,0x01,0x04,0x01,0x08,0x05,0x02,0x09),
194	LL(0x4f,0x4f,0x21,0x4f,0x42,0x6e,0x9e,0x0d),
195	LL(0x36,0x36,0xd8,0x36,0xad,0xee,0x6c,0x9b),
196	LL(0xa6,0xa6,0xa2,0xa6,0x59,0x04,0x51,0xff),
197	LL(0xd2,0xd2,0x6f,0xd2,0xde,0xbd,0xb9,0x0c),
198	LL(0xf5,0xf5,0xf3,0xf5,0xfb,0x06,0xf7,0x0e),
199	LL(0x79,0x79,0xf9,0x79,0xef,0x80,0xf2,0x96),
200	LL(0x6f,0x6f,0xa1,0x6f,0x5f,0xce,0xde,0x30),
201	LL(0x91,0x91,0x7e,0x91,0xfc,0xef,0x3f,0x6d),
202	LL(0x52,0x52,0x55,0x52,0xaa,0x07,0xa4,0xf8),
203	LL(0x60,0x60,0x9d,0x60,0x27,0xfd,0xc0,0x47),
204	LL(0xbc,0xbc,0xca,0xbc,0x89,0x76,0x65,0x35),
205	LL(0x9b,0x9b,0x56,0x9b,0xac,0xcd,0x2b,0x37),
206	LL(0x8e,0x8e,0x02,0x8e,0x04,0x8c,0x01,0x8a),
207	LL(0xa3,0xa3,0xb6,0xa3,0x71,0x15,0x5b,0xd2),
208	LL(0x0c,0x0c,0x30,0x0c,0x60,0x3c,0x18,0x6c),
209	LL(0x7b,0x7b,0xf1,0x7b,0xff,0x8a,0xf6,0x84),
210	LL(0x35,0x35,0xd4,0x35,0xb5,0xe1,0x6a,0x80),
211	LL(0x1d,0x1d,0x74,0x1d,0xe8,0x69,0x3a,0xf5),
212	LL(0xe0,0xe0,0xa7,0xe0,0x53,0x47,0xdd,0xb3),
213	LL(0xd7,0xd7,0x7b,0xd7,0xf6,0xac,0xb3,0x21),
214	LL(0xc2,0xc2,0x2f,0xc2,0x5e,0xed,0x99,0x9c),
215	LL(0x2e,0x2e,0xb8,0x2e,0x6d,0x96,0x5c,0x43),
216	LL(0x4b,0x4b,0x31,0x4b,0x62,0x7a,0x96,0x29),
217	LL(0xfe,0xfe,0xdf,0xfe,0xa3,0x21,0xe1,0x5d),
218	LL(0x57,0x57,0x41,0x57,0x82,0x16,0xae,0xd5),
219	LL(0x15,0x15,0x54,0x15,0xa8,0x41,0x2a,0xbd),
220	LL(0x77,0x77,0xc1,0x77,0x9f,0xb6,0xee,0xe8),
221	LL(0x37,0x37,0xdc,0x37,0xa5,0xeb,0x6e,0x92),
222	LL(0xe5,0xe5,0xb3,0xe5,0x7b,0x56,0xd7,0x9e),
223	LL(0x9f,0x9f,0x46,0x9f,0x8c,0xd9,0x23,0x13),
224	LL(0xf0,0xf0,0xe7,0xf0,0xd3,0x17,0xfd,0x23),
225	LL(0x4a,0x4a,0x35,0x4a,0x6a,0x7f,0x94,0x20),
226	LL(0xda,0xda,0x4f,0xda,0x9e,0x95,0xa9,0x44),
227	LL(0x58,0x58,0x7d,0x58,0xfa,0x25,0xb0,0xa2),
228	LL(0xc9,0xc9,0x03,0xc9,0x06,0xca,0x8f,0xcf),
229	LL(0x29,0x29,0xa4,0x29,0x55,0x8d,0x52,0x7c),
230	LL(0x0a,0x0a,0x28,0x0a,0x50,0x22,0x14,0x5a),
231	LL(0xb1,0xb1,0xfe,0xb1,0xe1,0x4f,0x7f,0x50),
232	LL(0xa0,0xa0,0xba,0xa0,0x69,0x1a,0x5d,0xc9),
233	LL(0x6b,0x6b,0xb1,0x6b,0x7f,0xda,0xd6,0x14),
234	LL(0x85,0x85,0x2e,0x85,0x5c,0xab,0x17,0xd9),
235	LL(0xbd,0xbd,0xce,0xbd,0x81,0x73,0x67,0x3c),
236	LL(0x5d,0x5d,0x69,0x5d,0xd2,0x34,0xba,0x8f),
237	LL(0x10,0x10,0x40,0x10,0x80,0x50,0x20,0x90),
238	LL(0xf4,0xf4,0xf7,0xf4,0xf3,0x03,0xf5,0x07),
239	LL(0xcb,0xcb,0x0b,0xcb,0x16,0xc0,0x8b,0xdd),
240	LL(0x3e,0x3e,0xf8,0x3e,0xed,0xc6,0x7c,0xd3),
241	LL(0x05,0x05,0x14,0x05,0x28,0x11,0x0a,0x2d),
242	LL(0x67,0x67,0x81,0x67,0x1f,0xe6,0xce,0x78),
243	LL(0xe4,0xe4,0xb7,0xe4,0x73,0x53,0xd5,0x97),
244	LL(0x27,0x27,0x9c,0x27,0x25,0xbb,0x4e,0x02),
245	LL(0x41,0x41,0x19,0x41,0x32,0x58,0x82,0x73),
246	LL(0x8b,0x8b,0x16,0x8b,0x2c,0x9d,0x0b,0xa7),
247	LL(0xa7,0xa7,0xa6,0xa7,0x51,0x01,0x53,0xf6),
248	LL(0x7d,0x7d,0xe9,0x7d,0xcf,0x94,0xfa,0xb2),
249	LL(0x95,0x95,0x6e,0x95,0xdc,0xfb,0x37,0x49),
250	LL(0xd8,0xd8,0x47,0xd8,0x8e,0x9f,0xad,0x56),
251	LL(0xfb,0xfb,0xcb,0xfb,0x8b,0x30,0xeb,0x70),
252	LL(0xee,0xee,0x9f,0xee,0x23,0x71,0xc1,0xcd),
253	LL(0x7c,0x7c,0xed,0x7c,0xc7,0x91,0xf8,0xbb),
254	LL(0x66,0x66,0x85,0x66,0x17,0xe3,0xcc,0x71),
255	LL(0xdd,0xdd,0x53,0xdd,0xa6,0x8e,0xa7,0x7b),
256	LL(0x17,0x17,0x5c,0x17,0xb8,0x4b,0x2e,0xaf),
257	LL(0x47,0x47,0x01,0x47,0x02,0x46,0x8e,0x45),
258	LL(0x9e,0x9e,0x42,0x9e,0x84,0xdc,0x21,0x1a),
259	LL(0xca,0xca,0x0f,0xca,0x1e,0xc5,0x89,0xd4),
260	LL(0x2d,0x2d,0xb4,0x2d,0x75,0x99,0x5a,0x58),
261	LL(0xbf,0xbf,0xc6,0xbf,0x91,0x79,0x63,0x2e),
262	LL(0x07,0x07,0x1c,0x07,0x38,0x1b,0x0e,0x3f),
263	LL(0xad,0xad,0x8e,0xad,0x01,0x23,0x47,0xac),
264	LL(0x5a,0x5a,0x75,0x5a,0xea,0x2f,0xb4,0xb0),
265	LL(0x83,0x83,0x36,0x83,0x6c,0xb5,0x1b,0xef),
266	LL(0x33,0x33,0xcc,0x33,0x85,0xff,0x66,0xb6),
267	LL(0x63,0x63,0x91,0x63,0x3f,0xf2,0xc6,0x5c),
268	LL(0x02,0x02,0x08,0x02,0x10,0x0a,0x04,0x12),
269	LL(0xaa,0xaa,0x92,0xaa,0x39,0x38,0x49,0x93),
270	LL(0x71,0x71,0xd9,0x71,0xaf,0xa8,0xe2,0xde),
271	LL(0xc8,0xc8,0x07,0xc8,0x0e,0xcf,0x8d,0xc6),
272	LL(0x19,0x19,0x64,0x19,0xc8,0x7d,0x32,0xd1),
273	LL(0x49,0x49,0x39,0x49,0x72,0x70,0x92,0x3b),
274	LL(0xd9,0xd9,0x43,0xd9,0x86,0x9a,0xaf,0x5f),
275	LL(0xf2,0xf2,0xef,0xf2,0xc3,0x1d,0xf9,0x31),
276	LL(0xe3,0xe3,0xab,0xe3,0x4b,0x48,0xdb,0xa8),
277	LL(0x5b,0x5b,0x71,0x5b,0xe2,0x2a,0xb6,0xb9),
278	LL(0x88,0x88,0x1a,0x88,0x34,0x92,0x0d,0xbc),
279	LL(0x9a,0x9a,0x52,0x9a,0xa4,0xc8,0x29,0x3e),
280	LL(0x26,0x26,0x98,0x26,0x2d,0xbe,0x4c,0x0b),
281	LL(0x32,0x32,0xc8,0x32,0x8d,0xfa,0x64,0xbf),
282	LL(0xb0,0xb0,0xfa,0xb0,0xe9,0x4a,0x7d,0x59),
283	LL(0xe9,0xe9,0x83,0xe9,0x1b,0x6a,0xcf,0xf2),
284	LL(0x0f,0x0f,0x3c,0x0f,0x78,0x33,0x1e,0x77),
285	LL(0xd5,0xd5,0x73,0xd5,0xe6,0xa6,0xb7,0x33),
286	LL(0x80,0x80,0x3a,0x80,0x74,0xba,0x1d,0xf4),
287	LL(0xbe,0xbe,0xc2,0xbe,0x99,0x7c,0x61,0x27),
288	LL(0xcd,0xcd,0x13,0xcd,0x26,0xde,0x87,0xeb),
289	LL(0x34,0x34,0xd0,0x34,0xbd,0xe4,0x68,0x89),
290	LL(0x48,0x48,0x3d,0x48,0x7a,0x75,0x90,0x32),
291	LL(0xff,0xff,0xdb,0xff,0xab,0x24,0xe3,0x54),
292	LL(0x7a,0x7a,0xf5,0x7a,0xf7,0x8f,0xf4,0x8d),
293	LL(0x90,0x90,0x7a,0x90,0xf4,0xea,0x3d,0x64),
294	LL(0x5f,0x5f,0x61,0x5f,0xc2,0x3e,0xbe,0x9d),
295	LL(0x20,0x20,0x80,0x20,0x1d,0xa0,0x40,0x3d),
296	LL(0x68,0x68,0xbd,0x68,0x67,0xd5,0xd0,0x0f),
297	LL(0x1a,0x1a,0x68,0x1a,0xd0,0x72,0x34,0xca),
298	LL(0xae,0xae,0x82,0xae,0x19,0x2c,0x41,0xb7),
299	LL(0xb4,0xb4,0xea,0xb4,0xc9,0x5e,0x75,0x7d),
300	LL(0x54,0x54,0x4d,0x54,0x9a,0x19,0xa8,0xce),
301	LL(0x93,0x93,0x76,0x93,0xec,0xe5,0x3b,0x7f),
302	LL(0x22,0x22,0x88,0x22,0x0d,0xaa,0x44,0x2f),
303	LL(0x64,0x64,0x8d,0x64,0x07,0xe9,0xc8,0x63),
304	LL(0xf1,0xf1,0xe3,0xf1,0xdb,0x12,0xff,0x2a),
305	LL(0x73,0x73,0xd1,0x73,0xbf,0xa2,0xe6,0xcc),
306	LL(0x12,0x12,0x48,0x12,0x90,0x5a,0x24,0x82),
307	LL(0x40,0x40,0x1d,0x40,0x3a,0x5d,0x80,0x7a),
308	LL(0x08,0x08,0x20,0x08,0x40,0x28,0x10,0x48),
309	LL(0xc3,0xc3,0x2b,0xc3,0x56,0xe8,0x9b,0x95),
310	LL(0xec,0xec,0x97,0xec,0x33,0x7b,0xc5,0xdf),
311	LL(0xdb,0xdb,0x4b,0xdb,0x96,0x90,0xab,0x4d),
312	LL(0xa1,0xa1,0xbe,0xa1,0x61,0x1f,0x5f,0xc0),
313	LL(0x8d,0x8d,0x0e,0x8d,0x1c,0x83,0x07,0x91),
314	LL(0x3d,0x3d,0xf4,0x3d,0xf5,0xc9,0x7a,0xc8),
315	LL(0x97,0x97,0x66,0x97,0xcc,0xf1,0x33,0x5b),
316	LL(0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00),
317	LL(0xcf,0xcf,0x1b,0xcf,0x36,0xd4,0x83,0xf9),
318	LL(0x2b,0x2b,0xac,0x2b,0x45,0x87,0x56,0x6e),
319	LL(0x76,0x76,0xc5,0x76,0x97,0xb3,0xec,0xe1),
320	LL(0x82,0x82,0x32,0x82,0x64,0xb0,0x19,0xe6),
321	LL(0xd6,0xd6,0x7f,0xd6,0xfe,0xa9,0xb1,0x28),
322	LL(0x1b,0x1b,0x6c,0x1b,0xd8,0x77,0x36,0xc3),
323	LL(0xb5,0xb5,0xee,0xb5,0xc1,0x5b,0x77,0x74),
324	LL(0xaf,0xaf,0x86,0xaf,0x11,0x29,0x43,0xbe),
325	LL(0x6a,0x6a,0xb5,0x6a,0x77,0xdf,0xd4,0x1d),
326	LL(0x50,0x50,0x5d,0x50,0xba,0x0d,0xa0,0xea),
327	LL(0x45,0x45,0x09,0x45,0x12,0x4c,0x8a,0x57),
328	LL(0xf3,0xf3,0xeb,0xf3,0xcb,0x18,0xfb,0x38),
329	LL(0x30,0x30,0xc0,0x30,0x9d,0xf0,0x60,0xad),
330	LL(0xef,0xef,0x9b,0xef,0x2b,0x74,0xc3,0xc4),
331	LL(0x3f,0x3f,0xfc,0x3f,0xe5,0xc3,0x7e,0xda),
332	LL(0x55,0x55,0x49,0x55,0x92,0x1c,0xaa,0xc7),
333	LL(0xa2,0xa2,0xb2,0xa2,0x79,0x10,0x59,0xdb),
334	LL(0xea,0xea,0x8f,0xea,0x03,0x65,0xc9,0xe9),
335	LL(0x65,0x65,0x89,0x65,0x0f,0xec,0xca,0x6a),
336	LL(0xba,0xba,0xd2,0xba,0xb9,0x68,0x69,0x03),
337	LL(0x2f,0x2f,0xbc,0x2f,0x65,0x93,0x5e,0x4a),
338	LL(0xc0,0xc0,0x27,0xc0,0x4e,0xe7,0x9d,0x8e),
339	LL(0xde,0xde,0x5f,0xde,0xbe,0x81,0xa1,0x60),
340	LL(0x1c,0x1c,0x70,0x1c,0xe0,0x6c,0x38,0xfc),
341	LL(0xfd,0xfd,0xd3,0xfd,0xbb,0x2e,0xe7,0x46),
342	LL(0x4d,0x4d,0x29,0x4d,0x52,0x64,0x9a,0x1f),
343	LL(0x92,0x92,0x72,0x92,0xe4,0xe0,0x39,0x76),
344	LL(0x75,0x75,0xc9,0x75,0x8f,0xbc,0xea,0xfa),
345	LL(0x06,0x06,0x18,0x06,0x30,0x1e,0x0c,0x36),
346	LL(0x8a,0x8a,0x12,0x8a,0x24,0x98,0x09,0xae),
347	LL(0xb2,0xb2,0xf2,0xb2,0xf9,0x40,0x79,0x4b),
348	LL(0xe6,0xe6,0xbf,0xe6,0x63,0x59,0xd1,0x85),
349	LL(0x0e,0x0e,0x38,0x0e,0x70,0x36,0x1c,0x7e),
350	LL(0x1f,0x1f,0x7c,0x1f,0xf8,0x63,0x3e,0xe7),
351	LL(0x62,0x62,0x95,0x62,0x37,0xf7,0xc4,0x55),
352	LL(0xd4,0xd4,0x77,0xd4,0xee,0xa3,0xb5,0x3a),
353	LL(0xa8,0xa8,0x9a,0xa8,0x29,0x32,0x4d,0x81),
354	LL(0x96,0x96,0x62,0x96,0xc4,0xf4,0x31,0x52),
355	LL(0xf9,0xf9,0xc3,0xf9,0x9b,0x3a,0xef,0x62),
356	LL(0xc5,0xc5,0x33,0xc5,0x66,0xf6,0x97,0xa3),
357	LL(0x25,0x25,0x94,0x25,0x35,0xb1,0x4a,0x10),
358	LL(0x59,0x59,0x79,0x59,0xf2,0x20,0xb2,0xab),
359	LL(0x84,0x84,0x2a,0x84,0x54,0xae,0x15,0xd0),
360	LL(0x72,0x72,0xd5,0x72,0xb7,0xa7,0xe4,0xc5),
361	LL(0x39,0x39,0xe4,0x39,0xd5,0xdd,0x72,0xec),
362	LL(0x4c,0x4c,0x2d,0x4c,0x5a,0x61,0x98,0x16),
363	LL(0x5e,0x5e,0x65,0x5e,0xca,0x3b,0xbc,0x94),
364	LL(0x78,0x78,0xfd,0x78,0xe7,0x85,0xf0,0x9f),
365	LL(0x38,0x38,0xe0,0x38,0xdd,0xd8,0x70,0xe5),
366	LL(0x8c,0x8c,0x0a,0x8c,0x14,0x86,0x05,0x98),
367	LL(0xd1,0xd1,0x63,0xd1,0xc6,0xb2,0xbf,0x17),
368	LL(0xa5,0xa5,0xae,0xa5,0x41,0x0b,0x57,0xe4),
369	LL(0xe2,0xe2,0xaf,0xe2,0x43,0x4d,0xd9,0xa1),
370	LL(0x61,0x61,0x99,0x61,0x2f,0xf8,0xc2,0x4e),
371	LL(0xb3,0xb3,0xf6,0xb3,0xf1,0x45,0x7b,0x42),
372	LL(0x21,0x21,0x84,0x21,0x15,0xa5,0x42,0x34),
373	LL(0x9c,0x9c,0x4a,0x9c,0x94,0xd6,0x25,0x08),
374	LL(0x1e,0x1e,0x78,0x1e,0xf0,0x66,0x3c,0xee),
375	LL(0x43,0x43,0x11,0x43,0x22,0x52,0x86,0x61),
376	LL(0xc7,0xc7,0x3b,0xc7,0x76,0xfc,0x93,0xb1),
377	LL(0xfc,0xfc,0xd7,0xfc,0xb3,0x2b,0xe5,0x4f),
378	LL(0x04,0x04,0x10,0x04,0x20,0x14,0x08,0x24),
379	LL(0x51,0x51,0x59,0x51,0xb2,0x08,0xa2,0xe3),
380	LL(0x99,0x99,0x5e,0x99,0xbc,0xc7,0x2f,0x25),
381	LL(0x6d,0x6d,0xa9,0x6d,0x4f,0xc4,0xda,0x22),
382	LL(0x0d,0x0d,0x34,0x0d,0x68,0x39,0x1a,0x65),
383	LL(0xfa,0xfa,0xcf,0xfa,0x83,0x35,0xe9,0x79),
384	LL(0xdf,0xdf,0x5b,0xdf,0xb6,0x84,0xa3,0x69),
385	LL(0x7e,0x7e,0xe5,0x7e,0xd7,0x9b,0xfc,0xa9),
386	LL(0x24,0x24,0x90,0x24,0x3d,0xb4,0x48,0x19),
387	LL(0x3b,0x3b,0xec,0x3b,0xc5,0xd7,0x76,0xfe),
388	LL(0xab,0xab,0x96,0xab,0x31,0x3d,0x4b,0x9a),
389	LL(0xce,0xce,0x1f,0xce,0x3e,0xd1,0x81,0xf0),
390	LL(0x11,0x11,0x44,0x11,0x88,0x55,0x22,0x99),
391	LL(0x8f,0x8f,0x06,0x8f,0x0c,0x89,0x03,0x83),
392	LL(0x4e,0x4e,0x25,0x4e,0x4a,0x6b,0x9c,0x04),
393	LL(0xb7,0xb7,0xe6,0xb7,0xd1,0x51,0x73,0x66),
394	LL(0xeb,0xeb,0x8b,0xeb,0x0b,0x60,0xcb,0xe0),
395	LL(0x3c,0x3c,0xf0,0x3c,0xfd,0xcc,0x78,0xc1),
396	LL(0x81,0x81,0x3e,0x81,0x7c,0xbf,0x1f,0xfd),
397	LL(0x94,0x94,0x6a,0x94,0xd4,0xfe,0x35,0x40),
398	LL(0xf7,0xf7,0xfb,0xf7,0xeb,0x0c,0xf3,0x1c),
399	LL(0xb9,0xb9,0xde,0xb9,0xa1,0x67,0x6f,0x18),
400	LL(0x13,0x13,0x4c,0x13,0x98,0x5f,0x26,0x8b),
401	LL(0x2c,0x2c,0xb0,0x2c,0x7d,0x9c,0x58,0x51),
402	LL(0xd3,0xd3,0x6b,0xd3,0xd6,0xb8,0xbb,0x05),
403	LL(0xe7,0xe7,0xbb,0xe7,0x6b,0x5c,0xd3,0x8c),
404	LL(0x6e,0x6e,0xa5,0x6e,0x57,0xcb,0xdc,0x39),
405	LL(0xc4,0xc4,0x37,0xc4,0x6e,0xf3,0x95,0xaa),
406	LL(0x03,0x03,0x0c,0x03,0x18,0x0f,0x06,0x1b),
407	LL(0x56,0x56,0x45,0x56,0x8a,0x13,0xac,0xdc),
408	LL(0x44,0x44,0x0d,0x44,0x1a,0x49,0x88,0x5e),
409	LL(0x7f,0x7f,0xe1,0x7f,0xdf,0x9e,0xfe,0xa0),
410	LL(0xa9,0xa9,0x9e,0xa9,0x21,0x37,0x4f,0x88),
411	LL(0x2a,0x2a,0xa8,0x2a,0x4d,0x82,0x54,0x67),
412	LL(0xbb,0xbb,0xd6,0xbb,0xb1,0x6d,0x6b,0x0a),
413	LL(0xc1,0xc1,0x23,0xc1,0x46,0xe2,0x9f,0x87),
414	LL(0x53,0x53,0x51,0x53,0xa2,0x02,0xa6,0xf1),
415	LL(0xdc,0xdc,0x57,0xdc,0xae,0x8b,0xa5,0x72),
416	LL(0x0b,0x0b,0x2c,0x0b,0x58,0x27,0x16,0x53),
417	LL(0x9d,0x9d,0x4e,0x9d,0x9c,0xd3,0x27,0x01),
418	LL(0x6c,0x6c,0xad,0x6c,0x47,0xc1,0xd8,0x2b),
419	LL(0x31,0x31,0xc4,0x31,0x95,0xf5,0x62,0xa4),
420	LL(0x74,0x74,0xcd,0x74,0x87,0xb9,0xe8,0xf3),
421	LL(0xf6,0xf6,0xff,0xf6,0xe3,0x09,0xf1,0x15),
422	LL(0x46,0x46,0x05,0x46,0x0a,0x43,0x8c,0x4c),
423	LL(0xac,0xac,0x8a,0xac,0x09,0x26,0x45,0xa5),
424	LL(0x89,0x89,0x1e,0x89,0x3c,0x97,0x0f,0xb5),
425	LL(0x14,0x14,0x50,0x14,0xa0,0x44,0x28,0xb4),
426	LL(0xe1,0xe1,0xa3,0xe1,0x5b,0x42,0xdf,0xba),
427	LL(0x16,0x16,0x58,0x16,0xb0,0x4e,0x2c,0xa6),
428	LL(0x3a,0x3a,0xe8,0x3a,0xcd,0xd2,0x74,0xf7),
429	LL(0x69,0x69,0xb9,0x69,0x6f,0xd0,0xd2,0x06),
430	LL(0x09,0x09,0x24,0x09,0x48,0x2d,0x12,0x41),
431	LL(0x70,0x70,0xdd,0x70,0xa7,0xad,0xe0,0xd7),
432	LL(0xb6,0xb6,0xe2,0xb6,0xd9,0x54,0x71,0x6f),
433	LL(0xd0,0xd0,0x67,0xd0,0xce,0xb7,0xbd,0x1e),
434	LL(0xed,0xed,0x93,0xed,0x3b,0x7e,0xc7,0xd6),
435	LL(0xcc,0xcc,0x17,0xcc,0x2e,0xdb,0x85,0xe2),
436	LL(0x42,0x42,0x15,0x42,0x2a,0x57,0x84,0x68),
437	LL(0x98,0x98,0x5a,0x98,0xb4,0xc2,0x2d,0x2c),
438	LL(0xa4,0xa4,0xaa,0xa4,0x49,0x0e,0x55,0xed),
439	LL(0x28,0x28,0xa0,0x28,0x5d,0x88,0x50,0x75),
440	LL(0x5c,0x5c,0x6d,0x5c,0xda,0x31,0xb8,0x86),
441	LL(0xf8,0xf8,0xc7,0xf8,0x93,0x3f,0xed,0x6b),
442	LL(0x86,0x86,0x22,0x86,0x44,0xa4,0x11,0xc2),
443#define RC	(&(Cx.q[256*N]))
444	0x18,0x23,0xc6,0xe8,0x87,0xb8,0x01,0x4f,	/* rc[ROUNDS] */
445	0x36,0xa6,0xd2,0xf5,0x79,0x6f,0x91,0x52,
446	0x60,0xbc,0x9b,0x8e,0xa3,0x0c,0x7b,0x35,
447	0x1d,0xe0,0xd7,0xc2,0x2e,0x4b,0xfe,0x57,
448	0x15,0x77,0x37,0xe5,0x9f,0xf0,0x4a,0xda,
449	0x58,0xc9,0x29,0x0a,0xb1,0xa0,0x6b,0x85,
450	0xbd,0x5d,0x10,0xf4,0xcb,0x3e,0x05,0x67,
451	0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8,
452	0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e,
453	0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33
454	}
455};
456
457void whirlpool_block(WHIRLPOOL_CTX *ctx,const void *inp,size_t n)
458	{
459	int	r;
460	const u8 *p=inp;
461	union	{ u64 q[8]; u8 c[64]; } S,K,*H=(void *)ctx->H.q;
462
463#ifdef GO_FOR_MMX
464	GO_FOR_MMX(ctx,inp,n);
465#endif
466							do {
467#ifdef OPENSSL_SMALL_FOOTPRINT
468	u64	L[8];
469	int	i;
470
471	for (i=0;i<64;i++)	S.c[i] = (K.c[i] = H->c[i]) ^ p[i];
472	for (r=0;r<ROUNDS;r++)
473		{
474		for (i=0;i<8;i++)
475			{
476			L[i]  = i ? 0 : RC[r];
477			L[i] ^=	C0(K,i)       ^ C1(K,(i-1)&7) ^
478				C2(K,(i-2)&7) ^ C3(K,(i-3)&7) ^
479				C4(K,(i-4)&7) ^ C5(K,(i-5)&7) ^
480				C6(K,(i-6)&7) ^ C7(K,(i-7)&7);
481			}
482		memcpy (K.q,L,64);
483		for (i=0;i<8;i++)
484			{
485			L[i] ^= C0(S,i)       ^ C1(S,(i-1)&7) ^
486				C2(S,(i-2)&7) ^ C3(S,(i-3)&7) ^
487				C4(S,(i-4)&7) ^ C5(S,(i-5)&7) ^
488				C6(S,(i-6)&7) ^ C7(S,(i-7)&7);
489			}
490		memcpy (S.q,L,64);
491		}
492	for (i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
493#else
494	u64	L0,L1,L2,L3,L4,L5,L6,L7;
495
496#ifdef __STRICT_ALIGNMENT
497	if ((size_t)p & 7)
498		{
499		memcpy (S.c,p,64);
500		S.q[0] ^= (K.q[0] = H->q[0]);
501		S.q[1] ^= (K.q[1] = H->q[1]);
502		S.q[2] ^= (K.q[2] = H->q[2]);
503		S.q[3] ^= (K.q[3] = H->q[3]);
504		S.q[4] ^= (K.q[4] = H->q[4]);
505		S.q[5] ^= (K.q[5] = H->q[5]);
506		S.q[6] ^= (K.q[6] = H->q[6]);
507		S.q[7] ^= (K.q[7] = H->q[7]);
508		}
509	else
510#endif
511		{
512		const u64 *pa = (const u64*)p;
513		S.q[0] = (K.q[0] = H->q[0]) ^ pa[0];
514		S.q[1] = (K.q[1] = H->q[1]) ^ pa[1];
515		S.q[2] = (K.q[2] = H->q[2]) ^ pa[2];
516		S.q[3] = (K.q[3] = H->q[3]) ^ pa[3];
517		S.q[4] = (K.q[4] = H->q[4]) ^ pa[4];
518		S.q[5] = (K.q[5] = H->q[5]) ^ pa[5];
519		S.q[6] = (K.q[6] = H->q[6]) ^ pa[6];
520		S.q[7] = (K.q[7] = H->q[7]) ^ pa[7];
521		}
522
523	for(r=0;r<ROUNDS;r++)
524		{
525#ifdef SMALL_REGISTER_BANK
526		L0 =	C0(K,0) ^ C1(K,7) ^ C2(K,6) ^ C3(K,5) ^
527			C4(K,4) ^ C5(K,3) ^ C6(K,2) ^ C7(K,1) ^ RC[r];
528		L1 =	C0(K,1) ^ C1(K,0) ^ C2(K,7) ^ C3(K,6) ^
529			C4(K,5) ^ C5(K,4) ^ C6(K,3) ^ C7(K,2);
530		L2 =	C0(K,2) ^ C1(K,1) ^ C2(K,0) ^ C3(K,7) ^
531			C4(K,6) ^ C5(K,5) ^ C6(K,4) ^ C7(K,3);
532		L3 =	C0(K,3) ^ C1(K,2) ^ C2(K,1) ^ C3(K,0) ^
533			C4(K,7) ^ C5(K,6) ^ C6(K,5) ^ C7(K,4);
534		L4 =	C0(K,4) ^ C1(K,3) ^ C2(K,2) ^ C3(K,1) ^
535			C4(K,0) ^ C5(K,7) ^ C6(K,6) ^ C7(K,5);
536		L5 =	C0(K,5) ^ C1(K,4) ^ C2(K,3) ^ C3(K,2) ^
537			C4(K,1) ^ C5(K,0) ^ C6(K,7) ^ C7(K,6);
538		L6 =	C0(K,6) ^ C1(K,5) ^ C2(K,4) ^ C3(K,3) ^
539			C4(K,2) ^ C5(K,1) ^ C6(K,0) ^ C7(K,7);
540		L7 =	C0(K,7) ^ C1(K,6) ^ C2(K,5) ^ C3(K,4) ^
541			C4(K,3) ^ C5(K,2) ^ C6(K,1) ^ C7(K,0);
542
543		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
544		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
545
546		L0 ^=	C0(S,0) ^ C1(S,7) ^ C2(S,6) ^ C3(S,5) ^
547			C4(S,4) ^ C5(S,3) ^ C6(S,2) ^ C7(S,1);
548		L1 ^=	C0(S,1) ^ C1(S,0) ^ C2(S,7) ^ C3(S,6) ^
549			C4(S,5) ^ C5(S,4) ^ C6(S,3) ^ C7(S,2);
550		L2 ^=	C0(S,2) ^ C1(S,1) ^ C2(S,0) ^ C3(S,7) ^
551			C4(S,6) ^ C5(S,5) ^ C6(S,4) ^ C7(S,3);
552		L3 ^=	C0(S,3) ^ C1(S,2) ^ C2(S,1) ^ C3(S,0) ^
553			C4(S,7) ^ C5(S,6) ^ C6(S,5) ^ C7(S,4);
554		L4 ^=	C0(S,4) ^ C1(S,3) ^ C2(S,2) ^ C3(S,1) ^
555			C4(S,0) ^ C5(S,7) ^ C6(S,6) ^ C7(S,5);
556		L5 ^=	C0(S,5) ^ C1(S,4) ^ C2(S,3) ^ C3(S,2) ^
557			C4(S,1) ^ C5(S,0) ^ C6(S,7) ^ C7(S,6);
558		L6 ^=	C0(S,6) ^ C1(S,5) ^ C2(S,4) ^ C3(S,3) ^
559			C4(S,2) ^ C5(S,1) ^ C6(S,0) ^ C7(S,7);
560		L7 ^=	C0(S,7) ^ C1(S,6) ^ C2(S,5) ^ C3(S,4) ^
561			C4(S,3) ^ C5(S,2) ^ C6(S,1) ^ C7(S,0);
562
563		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
564		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
565#else
566		L0  = C0(K,0); L1  = C1(K,0); L2  = C2(K,0); L3  = C3(K,0);
567		L4  = C4(K,0); L5  = C5(K,0); L6  = C6(K,0); L7  = C7(K,0);
568		L0 ^= RC[r];
569
570		L1 ^= C0(K,1); L2 ^= C1(K,1); L3 ^= C2(K,1); L4 ^= C3(K,1);
571		L5 ^= C4(K,1); L6 ^= C5(K,1); L7 ^= C6(K,1); L0 ^= C7(K,1);
572
573		L2 ^= C0(K,2); L3 ^= C1(K,2); L4 ^= C2(K,2); L5 ^= C3(K,2);
574		L6 ^= C4(K,2); L7 ^= C5(K,2); L0 ^= C6(K,2); L1 ^= C7(K,2);
575
576		L3 ^= C0(K,3); L4 ^= C1(K,3); L5 ^= C2(K,3); L6 ^= C3(K,3);
577		L7 ^= C4(K,3); L0 ^= C5(K,3); L1 ^= C6(K,3); L2 ^= C7(K,3);
578
579		L4 ^= C0(K,4); L5 ^= C1(K,4); L6 ^= C2(K,4); L7 ^= C3(K,4);
580		L0 ^= C4(K,4); L1 ^= C5(K,4); L2 ^= C6(K,4); L3 ^= C7(K,4);
581
582		L5 ^= C0(K,5); L6 ^= C1(K,5); L7 ^= C2(K,5); L0 ^= C3(K,5);
583		L1 ^= C4(K,5); L2 ^= C5(K,5); L3 ^= C6(K,5); L4 ^= C7(K,5);
584
585		L6 ^= C0(K,6); L7 ^= C1(K,6); L0 ^= C2(K,6); L1 ^= C3(K,6);
586		L2 ^= C4(K,6); L3 ^= C5(K,6); L4 ^= C6(K,6); L5 ^= C7(K,6);
587
588		L7 ^= C0(K,7); L0 ^= C1(K,7); L1 ^= C2(K,7); L2 ^= C3(K,7);
589		L3 ^= C4(K,7); L4 ^= C5(K,7); L5 ^= C6(K,7); L6 ^= C7(K,7);
590
591		K.q[0] = L0; K.q[1] = L1; K.q[2] = L2; K.q[3] = L3;
592		K.q[4] = L4; K.q[5] = L5; K.q[6] = L6; K.q[7] = L7;
593
594		L0 ^= C0(S,0); L1 ^= C1(S,0); L2 ^= C2(S,0); L3 ^= C3(S,0);
595		L4 ^= C4(S,0); L5 ^= C5(S,0); L6 ^= C6(S,0); L7 ^= C7(S,0);
596
597		L1 ^= C0(S,1); L2 ^= C1(S,1); L3 ^= C2(S,1); L4 ^= C3(S,1);
598		L5 ^= C4(S,1); L6 ^= C5(S,1); L7 ^= C6(S,1); L0 ^= C7(S,1);
599
600		L2 ^= C0(S,2); L3 ^= C1(S,2); L4 ^= C2(S,2); L5 ^= C3(S,2);
601		L6 ^= C4(S,2); L7 ^= C5(S,2); L0 ^= C6(S,2); L1 ^= C7(S,2);
602
603		L3 ^= C0(S,3); L4 ^= C1(S,3); L5 ^= C2(S,3); L6 ^= C3(S,3);
604		L7 ^= C4(S,3); L0 ^= C5(S,3); L1 ^= C6(S,3); L2 ^= C7(S,3);
605
606		L4 ^= C0(S,4); L5 ^= C1(S,4); L6 ^= C2(S,4); L7 ^= C3(S,4);
607		L0 ^= C4(S,4); L1 ^= C5(S,4); L2 ^= C6(S,4); L3 ^= C7(S,4);
608
609		L5 ^= C0(S,5); L6 ^= C1(S,5); L7 ^= C2(S,5); L0 ^= C3(S,5);
610		L1 ^= C4(S,5); L2 ^= C5(S,5); L3 ^= C6(S,5); L4 ^= C7(S,5);
611
612		L6 ^= C0(S,6); L7 ^= C1(S,6); L0 ^= C2(S,6); L1 ^= C3(S,6);
613		L2 ^= C4(S,6); L3 ^= C5(S,6); L4 ^= C6(S,6); L5 ^= C7(S,6);
614
615		L7 ^= C0(S,7); L0 ^= C1(S,7); L1 ^= C2(S,7); L2 ^= C3(S,7);
616		L3 ^= C4(S,7); L4 ^= C5(S,7); L5 ^= C6(S,7); L6 ^= C7(S,7);
617
618		S.q[0] = L0; S.q[1] = L1; S.q[2] = L2; S.q[3] = L3;
619		S.q[4] = L4; S.q[5] = L5; S.q[6] = L6; S.q[7] = L7;
620#endif
621		}
622
623#ifdef __STRICT_ALIGNMENT
624	if ((size_t)p & 7)
625		{
626		int i;
627		for(i=0;i<64;i++)	H->c[i] ^= S.c[i] ^ p[i];
628		}
629	else
630#endif
631		{
632		const u64 *pa=(const u64 *)p;
633		H->q[0] ^= S.q[0] ^ pa[0];
634		H->q[1] ^= S.q[1] ^ pa[1];
635		H->q[2] ^= S.q[2] ^ pa[2];
636		H->q[3] ^= S.q[3] ^ pa[3];
637		H->q[4] ^= S.q[4] ^ pa[4];
638		H->q[5] ^= S.q[5] ^ pa[5];
639		H->q[6] ^= S.q[6] ^ pa[6];
640		H->q[7] ^= S.q[7] ^ pa[7];
641		}
642#endif
643							p += 64;
644							} while(--n);
645	}
646
647int
648WHIRLPOOL_Init(WHIRLPOOL_CTX *c)
649{
650	memset (c, 0, sizeof(*c));
651	return (1);
652}
653LCRYPTO_ALIAS(WHIRLPOOL_Init);
654
655int
656WHIRLPOOL_Update(WHIRLPOOL_CTX *c, const void *_inp, size_t bytes)
657{
658	/* Well, largest suitable chunk size actually is
659	 * (1<<(sizeof(size_t)*8-3))-64, but below number
660	 * is large enough for not to care about excessive
661	 * calls to WHIRLPOOL_BitUpdate... */
662	size_t chunk = ((size_t)1) << (sizeof(size_t)*8 - 4);
663	const unsigned char *inp = _inp;
664
665	while (bytes >= chunk) {
666		WHIRLPOOL_BitUpdate(c, inp, chunk*8);
667		bytes -= chunk;
668		inp += chunk;
669	}
670	if (bytes)
671		WHIRLPOOL_BitUpdate(c, inp, bytes*8);
672
673	return (1);
674}
675LCRYPTO_ALIAS(WHIRLPOOL_Update);
676
677void
678WHIRLPOOL_BitUpdate(WHIRLPOOL_CTX *c, const void *_inp, size_t bits)
679{
680	size_t		n;
681	unsigned int	bitoff = c->bitoff,
682	    bitrem = bitoff % 8,
683	    inpgap = (8 - (unsigned int)bits % 8)&7;
684	const unsigned char *inp = _inp;
685
686	/* This 256-bit increment procedure relies on the size_t
687	 * being natural size of CPU register, so that we don't
688	 * have to mask the value in order to detect overflows. */
689	c->bitlen[0] += bits;
690	if (c->bitlen[0] < bits)	/* overflow */
691	{
692		n = 1;
693		do {
694			c->bitlen[n]++;
695		} while (c->bitlen[n]==0 &&
696		    ++n < (WHIRLPOOL_COUNTER/sizeof(size_t)));
697	}
698
699#ifndef OPENSSL_SMALL_FOOTPRINT
700reconsider:
701	if (inpgap==0 && bitrem==0)	/* byte-oriented loop */
702	{
703		while (bits) {
704			if (bitoff == 0 && (n = bits/WHIRLPOOL_BBLOCK)) {
705				whirlpool_block(c, inp, n);
706				inp += n*WHIRLPOOL_BBLOCK/8;
707				bits %= WHIRLPOOL_BBLOCK;
708			} else {
709				unsigned int byteoff = bitoff/8;
710
711				bitrem = WHIRLPOOL_BBLOCK - bitoff;/* re-use bitrem */
712				if (bits >= bitrem) {
713					bits -= bitrem;
714					bitrem /= 8;
715					memcpy(c->data + byteoff, inp, bitrem);
716					inp += bitrem;
717					whirlpool_block(c, c->data, 1);
718					bitoff = 0;
719				} else {
720					memcpy(c->data + byteoff, inp, bits/8);
721					bitoff += (unsigned int)bits;
722					bits = 0;
723				}
724				c->bitoff = bitoff;
725			}
726		}
727	}
728	else				/* bit-oriented loop */
729#endif
730	{
731		/*
732			   inp
733			   |
734			   +-------+-------+-------
735			      |||||||||||||||||||||
736			   +-------+-------+-------
737		+-------+-------+-------+-------+-------
738		||||||||||||||				c->data
739		+-------+-------+-------+-------+-------
740			|
741			c->bitoff/8
742		*/
743		while (bits) {
744			unsigned int	byteoff = bitoff/8;
745			unsigned char	b;
746
747#ifndef OPENSSL_SMALL_FOOTPRINT
748			if (bitrem == inpgap) {
749				c->data[byteoff++] |= inp[0] & (0xff >> inpgap);
750				inpgap = 8 - inpgap;
751				bitoff += inpgap;  bitrem = 0;	/* bitoff%8 */
752				bits   -= inpgap;  inpgap = 0;	/* bits%8   */
753				inp++;
754				if (bitoff == WHIRLPOOL_BBLOCK) {
755					whirlpool_block(c, c->data, 1);
756					bitoff = 0;
757				}
758				c->bitoff = bitoff;
759				goto reconsider;
760			} else
761#endif
762			if (bits >= 8) {
763				b = ((inp[0]<<inpgap) | (inp[1]>>(8 - inpgap)));
764				b &= 0xff;
765				if (bitrem)
766					c->data[byteoff++] |= b >> bitrem;
767				else
768					c->data[byteoff++] = b;
769				bitoff += 8;
770				bits -= 8;
771				inp++;
772				if (bitoff >= WHIRLPOOL_BBLOCK) {
773					whirlpool_block(c, c->data, 1);
774					byteoff = 0;
775					bitoff  %= WHIRLPOOL_BBLOCK;
776				}
777				if (bitrem)
778					c->data[byteoff] = b << (8 - bitrem);
779			}
780			else	/* remaining less than 8 bits */
781			{
782				b = (inp[0]<<inpgap)&0xff;
783				if (bitrem)
784					c->data[byteoff++] |= b >> bitrem;
785				else
786					c->data[byteoff++] = b;
787				bitoff += (unsigned int)bits;
788				if (bitoff == WHIRLPOOL_BBLOCK) {
789					whirlpool_block(c, c->data, 1);
790					byteoff = 0;
791					bitoff  %= WHIRLPOOL_BBLOCK;
792				}
793				if (bitrem)
794					c->data[byteoff] = b << (8 - bitrem);
795				bits = 0;
796			}
797			c->bitoff = bitoff;
798		}
799	}
800}
801LCRYPTO_ALIAS(WHIRLPOOL_BitUpdate);
802
803int
804WHIRLPOOL_Final(unsigned char *md, WHIRLPOOL_CTX *c)
805{
806	unsigned int	bitoff = c->bitoff,
807	    byteoff = bitoff/8;
808	size_t		i, j, v;
809	unsigned char  *p;
810
811	bitoff %= 8;
812	if (bitoff)
813		c->data[byteoff] |= 0x80 >> bitoff;
814	else
815		c->data[byteoff] = 0x80;
816	byteoff++;
817
818	/* pad with zeros */
819	if (byteoff > (WHIRLPOOL_BBLOCK/8 - WHIRLPOOL_COUNTER)) {
820		if (byteoff < WHIRLPOOL_BBLOCK/8)
821			memset(&c->data[byteoff], 0, WHIRLPOOL_BBLOCK/8 - byteoff);
822		whirlpool_block(c, c->data, 1);
823		byteoff = 0;
824	}
825	if (byteoff < (WHIRLPOOL_BBLOCK/8 - WHIRLPOOL_COUNTER))
826		memset(&c->data[byteoff], 0,
827		    (WHIRLPOOL_BBLOCK/8 - WHIRLPOOL_COUNTER) - byteoff);
828	/* smash 256-bit c->bitlen in big-endian order */
829	p = &c->data[WHIRLPOOL_BBLOCK/8-1];	/* last byte in c->data */
830	for (i = 0; i < WHIRLPOOL_COUNTER/sizeof(size_t); i++)
831		for (v = c->bitlen[i], j = 0; j < sizeof(size_t); j++, v >>= 8)
832			*p-- = (unsigned char)(v&0xff);
833
834	whirlpool_block(c, c->data, 1);
835
836	if (md)	{
837		memcpy(md, c->H.c, WHIRLPOOL_DIGEST_LENGTH);
838		memset(c, 0, sizeof(*c));
839		return (1);
840	}
841	return (0);
842}
843LCRYPTO_ALIAS(WHIRLPOOL_Final);
844
845unsigned char *
846WHIRLPOOL(const void *inp, size_t bytes, unsigned char *md)
847{
848	WHIRLPOOL_CTX ctx;
849
850	WHIRLPOOL_Init(&ctx);
851	WHIRLPOOL_Update(&ctx, inp, bytes);
852	WHIRLPOOL_Final(md, &ctx);
853	return (md);
854}
855LCRYPTO_ALIAS(WHIRLPOOL);
856