sha512.c revision 1.11
1/* $OpenBSD: sha512.c,v 1.11 2014/07/09 16:06:13 miod Exp $ */
2/* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
6 */
7#include <openssl/opensslconf.h>
8#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9/*
10 * IMPLEMENTATION NOTES.
11 *
12 * As you might have noticed 32-bit hash algorithms:
13 *
14 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15 * - optimized versions implement two transform functions: one operating
16 *   on [aligned] data in host byte order and one - on data in input
17 *   stream byte order;
18 * - share common byte-order neutral collector and padding function
19 *   implementations, ../md32_common.h;
20 *
21 * Neither of the above applies to this SHA-512 implementations. Reasons
22 * [in reverse order] are:
23 *
24 * - it's the only 64-bit hash algorithm for the moment of this writing,
25 *   there is no need for common collector/padding implementation [yet];
26 * - by supporting only one transform function [which operates on
27 *   *aligned* data in input stream byte order, big-endian in this case]
28 *   we minimize burden of maintenance in two ways: a) collector/padding
29 *   function is simpler; b) only one transform function to stare at;
30 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31 *   apply a number of optimizations to mitigate potential performance
32 *   penalties caused by previous design decision;
33 *
34 * Caveat lector.
35 *
36 * Implementation relies on the fact that "long long" is 64-bit on
37 * both 32- and 64-bit platforms. If some compiler vendor comes up
38 * with 128-bit long long, adjustment to sha.h would be required.
39 * As this implementation relies on 64-bit integer type, it's totally
40 * inappropriate for platforms which don't support it, most notably
41 * 16-bit platforms.
42 *					<appro@fy.chalmers.se>
43 */
44#include <stdlib.h>
45#include <string.h>
46#include <machine/endian.h>
47
48#include <openssl/crypto.h>
49#include <openssl/sha.h>
50#include <openssl/opensslv.h>
51
52#include "cryptlib.h"
53
54#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM)
55#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
56#endif
57
58int SHA384_Init(SHA512_CTX *c)
59	{
60	c->h[0]=U64(0xcbbb9d5dc1059ed8);
61	c->h[1]=U64(0x629a292a367cd507);
62	c->h[2]=U64(0x9159015a3070dd17);
63	c->h[3]=U64(0x152fecd8f70e5939);
64	c->h[4]=U64(0x67332667ffc00b31);
65	c->h[5]=U64(0x8eb44a8768581511);
66	c->h[6]=U64(0xdb0c2e0d64f98fa7);
67	c->h[7]=U64(0x47b5481dbefa4fa4);
68
69        c->Nl=0;        c->Nh=0;
70        c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
71        return 1;
72	}
73
74int SHA512_Init(SHA512_CTX *c)
75	{
76	c->h[0]=U64(0x6a09e667f3bcc908);
77	c->h[1]=U64(0xbb67ae8584caa73b);
78	c->h[2]=U64(0x3c6ef372fe94f82b);
79	c->h[3]=U64(0xa54ff53a5f1d36f1);
80	c->h[4]=U64(0x510e527fade682d1);
81	c->h[5]=U64(0x9b05688c2b3e6c1f);
82	c->h[6]=U64(0x1f83d9abfb41bd6b);
83	c->h[7]=U64(0x5be0cd19137e2179);
84
85        c->Nl=0;        c->Nh=0;
86        c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
87        return 1;
88	}
89
90#ifndef SHA512_ASM
91static
92#endif
93void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
94
95int SHA512_Final (unsigned char *md, SHA512_CTX *c)
96	{
97	unsigned char *p=(unsigned char *)c->u.p;
98	size_t n=c->num;
99
100	p[n]=0x80;	/* There always is a room for one */
101	n++;
102	if (n > (sizeof(c->u)-16))
103		memset (p+n,0,sizeof(c->u)-n), n=0,
104		sha512_block_data_order (c,p,1);
105
106	memset (p+n,0,sizeof(c->u)-16-n);
107#if BYTE_ORDER == BIG_ENDIAN
108	c->u.d[SHA_LBLOCK-2] = c->Nh;
109	c->u.d[SHA_LBLOCK-1] = c->Nl;
110#else
111	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
112	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
113	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
114	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
115	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
116	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
117	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
118	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
119	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
120	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
121	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
122	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
123	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
124	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
125	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
126	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
127#endif
128
129	sha512_block_data_order (c,p,1);
130
131	if (md==0) return 0;
132
133	switch (c->md_len)
134		{
135		/* Let compiler decide if it's appropriate to unroll... */
136		case SHA384_DIGEST_LENGTH:
137			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
138				{
139				SHA_LONG64 t = c->h[n];
140
141				*(md++)	= (unsigned char)(t>>56);
142				*(md++)	= (unsigned char)(t>>48);
143				*(md++)	= (unsigned char)(t>>40);
144				*(md++)	= (unsigned char)(t>>32);
145				*(md++)	= (unsigned char)(t>>24);
146				*(md++)	= (unsigned char)(t>>16);
147				*(md++)	= (unsigned char)(t>>8);
148				*(md++)	= (unsigned char)(t);
149				}
150			break;
151		case SHA512_DIGEST_LENGTH:
152			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
153				{
154				SHA_LONG64 t = c->h[n];
155
156				*(md++)	= (unsigned char)(t>>56);
157				*(md++)	= (unsigned char)(t>>48);
158				*(md++)	= (unsigned char)(t>>40);
159				*(md++)	= (unsigned char)(t>>32);
160				*(md++)	= (unsigned char)(t>>24);
161				*(md++)	= (unsigned char)(t>>16);
162				*(md++)	= (unsigned char)(t>>8);
163				*(md++)	= (unsigned char)(t);
164				}
165			break;
166		/* ... as well as make sure md_len is not abused. */
167		default:	return 0;
168		}
169
170	return 1;
171	}
172
173int SHA384_Final (unsigned char *md,SHA512_CTX *c)
174{   return SHA512_Final (md,c);   }
175
176int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
177	{
178	SHA_LONG64	l;
179	unsigned char  *p=c->u.p;
180	const unsigned char *data=(const unsigned char *)_data;
181
182	if (len==0) return  1;
183
184	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
185	if (l < c->Nl)		c->Nh++;
186	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
187	c->Nl=l;
188
189	if (c->num != 0)
190		{
191		size_t n = sizeof(c->u) - c->num;
192
193		if (len < n)
194			{
195			memcpy (p+c->num,data,len), c->num += (unsigned int)len;
196			return 1;
197			}
198		else	{
199			memcpy (p+c->num,data,n), c->num = 0;
200			len-=n, data+=n;
201			sha512_block_data_order (c,p,1);
202			}
203		}
204
205	if (len >= sizeof(c->u))
206		{
207#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
208		if ((size_t)data%sizeof(c->u.d[0]) != 0)
209			while (len >= sizeof(c->u))
210				memcpy (p,data,sizeof(c->u)),
211				sha512_block_data_order (c,p,1),
212				len  -= sizeof(c->u),
213				data += sizeof(c->u);
214		else
215#endif
216			sha512_block_data_order (c,data,len/sizeof(c->u)),
217			data += len,
218			len  %= sizeof(c->u),
219			data -= len;
220		}
221
222	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
223
224	return 1;
225	}
226
227int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
228{   return SHA512_Update (c,data,len);   }
229
230void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
231	{
232#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
233	if ((size_t)data%sizeof(c->u.d[0]) != 0)
234		memcpy(c->u.p,data,sizeof(c->u.p)),
235		data = c->u.p;
236#endif
237	sha512_block_data_order (c,data,1);
238	}
239
240unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
241	{
242	SHA512_CTX c;
243	static unsigned char m[SHA384_DIGEST_LENGTH];
244
245	if (md == NULL) md=m;
246	SHA384_Init(&c);
247	SHA512_Update(&c,d,n);
248	SHA512_Final(md,&c);
249	OPENSSL_cleanse(&c,sizeof(c));
250	return(md);
251	}
252
253unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
254	{
255	SHA512_CTX c;
256	static unsigned char m[SHA512_DIGEST_LENGTH];
257
258	if (md == NULL) md=m;
259	SHA512_Init(&c);
260	SHA512_Update(&c,d,n);
261	SHA512_Final(md,&c);
262	OPENSSL_cleanse(&c,sizeof(c));
263	return(md);
264	}
265
266#ifndef SHA512_ASM
267static const SHA_LONG64 K512[80] = {
268        U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
269        U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
270        U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
271        U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
272        U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
273        U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
274        U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
275        U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
276        U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
277        U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
278        U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
279        U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
280        U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
281        U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
282        U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
283        U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
284        U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
285        U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
286        U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
287        U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
288        U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
289        U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
290        U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
291        U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
292        U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
293        U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
294        U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
295        U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
296        U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
297        U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
298        U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
299        U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
300        U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
301        U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
302        U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
303        U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
304        U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
305        U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
306        U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
307        U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
308
309#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
310# if defined(__x86_64) || defined(__x86_64__)
311#  define ROTR(a,n)	({ SHA_LONG64 ret;		\
312				asm ("rorq %1,%0"	\
313				: "=r"(ret)		\
314				: "J"(n),"0"(a)		\
315				: "cc"); ret;		})
316#   define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
317				asm ("bswapq	%0"		\
318				: "=r"(ret)			\
319				: "0"(ret)); ret;		})
320# elif (defined(__i386) || defined(__i386__))
321#  if defined(I386_ONLY)
322#   define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
323			 unsigned int hi=p[0],lo=p[1];		\
324				asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
325				    "roll $16,%%eax; roll $16,%%edx; "\
326				    "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
327				: "=a"(lo),"=d"(hi)		\
328				: "0"(lo),"1"(hi) : "cc");	\
329				((SHA_LONG64)hi)<<32|lo;	})
330#  else
331#   define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
332			 unsigned int hi=p[0],lo=p[1];		\
333				asm ("bswapl %0; bswapl %1;"	\
334				: "=r"(lo),"=r"(hi)		\
335				: "0"(lo),"1"(hi));		\
336				((SHA_LONG64)hi)<<32|lo;	})
337#  endif
338# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
339#  define ROTR(a,n)	({ SHA_LONG64 ret;		\
340				asm ("rotrdi %0,%1,%2"	\
341				: "=r"(ret)		\
342				: "r"(a),"K"(n)); ret;	})
343# endif
344#endif
345
346#ifndef PULL64
347#define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
348#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
349#endif
350
351#ifndef ROTR
352#define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
353#endif
354
355#define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
356#define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
357#define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
358#define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
359
360#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
361#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
362
363
364#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
365/*
366 * This code should give better results on 32-bit CPU with less than
367 * ~24 registers, both size and performance wise...
368 */
369static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
370	{
371	const SHA_LONG64 *W=in;
372	SHA_LONG64	A,E,T;
373	SHA_LONG64	X[9+80],*F;
374	int i;
375
376			while (num--) {
377
378	F    = X+80;
379	A    = ctx->h[0];	F[1] = ctx->h[1];
380	F[2] = ctx->h[2];	F[3] = ctx->h[3];
381	E    = ctx->h[4];	F[5] = ctx->h[5];
382	F[6] = ctx->h[6];	F[7] = ctx->h[7];
383
384	for (i=0;i<16;i++,F--)
385		{
386		T = PULL64(W[i]);
387		F[0] = A;
388		F[4] = E;
389		F[8] = T;
390		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
391		E    = F[3] + T;
392		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
393		}
394
395	for (;i<80;i++,F--)
396		{
397		T    = sigma0(F[8+16-1]);
398		T   += sigma1(F[8+16-14]);
399		T   += F[8+16] + F[8+16-9];
400
401		F[0] = A;
402		F[4] = E;
403		F[8] = T;
404		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
405		E    = F[3] + T;
406		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
407		}
408
409	ctx->h[0] += A;		ctx->h[1] += F[1];
410	ctx->h[2] += F[2];	ctx->h[3] += F[3];
411	ctx->h[4] += E;		ctx->h[5] += F[5];
412	ctx->h[6] += F[6];	ctx->h[7] += F[7];
413
414			W+=SHA_LBLOCK;
415			}
416	}
417
418#elif defined(OPENSSL_SMALL_FOOTPRINT)
419
420static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
421	{
422	const SHA_LONG64 *W=in;
423	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
424	SHA_LONG64	X[16];
425	int i;
426
427			while (num--) {
428
429	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
430	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
431
432	for (i=0;i<16;i++)
433		{
434#if BYTE_ORDER == BIG_ENDIAN
435		T1 = X[i] = W[i];
436#else
437		T1 = X[i] = PULL64(W[i]);
438#endif
439		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
440		T2 = Sigma0(a) + Maj(a,b,c);
441		h = g;	g = f;	f = e;	e = d + T1;
442		d = c;	c = b;	b = a;	a = T1 + T2;
443		}
444
445	for (;i<80;i++)
446		{
447		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
448		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
449
450		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
451		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
452		T2 = Sigma0(a) + Maj(a,b,c);
453		h = g;	g = f;	f = e;	e = d + T1;
454		d = c;	c = b;	b = a;	a = T1 + T2;
455		}
456
457	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
458	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
459
460			W+=SHA_LBLOCK;
461			}
462	}
463
464#else
465
466#define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
467	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
468	h = Sigma0(a) + Maj(a,b,c);			\
469	d += T1;	h += T1;		} while (0)
470
471#define	ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)	do {	\
472	s0 = X[(j+1)&0x0f];	s0 = sigma0(s0);	\
473	s1 = X[(j+14)&0x0f];	s1 = sigma1(s1);	\
474	T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];	\
475	ROUND_00_15(i+j,a,b,c,d,e,f,g,h);		} while (0)
476
477static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
478	{
479	const SHA_LONG64 *W=in;
480	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
481	SHA_LONG64	X[16];
482	int i;
483
484			while (num--) {
485
486	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
487	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
488
489#if BYTE_ORDER == BIG_ENDIAN
490	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
491	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
492	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
493	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
494	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
495	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
496	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
497	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
498	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
499	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
500	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
501	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
502	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
503	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
504	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
505	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
506#else
507	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
508	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
509	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
510	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
511	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
512	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
513	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
514	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
515	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
516	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
517	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
518	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
519	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
520	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
521	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
522	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
523#endif
524
525	for (i=16;i<80;i+=16)
526		{
527		ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
528		ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
529		ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
530		ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
531		ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
532		ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
533		ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
534		ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
535		ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
536		ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
537		ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
538		ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
539		ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
540		ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
541		ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
542		ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
543		}
544
545	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
546	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
547
548			W+=SHA_LBLOCK;
549			}
550	}
551
552#endif
553
554#endif /* SHA512_ASM */
555
556#endif /* !OPENSSL_NO_SHA512 */
557