1/* crypto/sha/sha512.c */
2/* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
6 */
7#include <openssl/opensslconf.h>
8#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9/*
10 * IMPLEMENTATION NOTES.
11 *
12 * As you might have noticed 32-bit hash algorithms:
13 *
14 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15 * - optimized versions implement two transform functions: one operating
16 *   on [aligned] data in host byte order and one - on data in input
17 *   stream byte order;
18 * - share common byte-order neutral collector and padding function
19 *   implementations, ../md32_common.h;
20 *
21 * Neither of the above applies to this SHA-512 implementations. Reasons
22 * [in reverse order] are:
23 *
24 * - it's the only 64-bit hash algorithm for the moment of this writing,
25 *   there is no need for common collector/padding implementation [yet];
26 * - by supporting only one transform function [which operates on
27 *   *aligned* data in input stream byte order, big-endian in this case]
28 *   we minimize burden of maintenance in two ways: a) collector/padding
29 *   function is simpler; b) only one transform function to stare at;
30 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31 *   apply a number of optimizations to mitigate potential performance
32 *   penalties caused by previous design decision;
33 *
34 * Caveat lector.
35 *
36 * Implementation relies on the fact that "long long" is 64-bit on
37 * both 32- and 64-bit platforms. If some compiler vendor comes up
38 * with 128-bit long long, adjustment to sha.h would be required.
39 * As this implementation relies on 64-bit integer type, it's totally
40 * inappropriate for platforms which don't support it, most notably
41 * 16-bit platforms.
42 *					<appro@fy.chalmers.se>
43 */
44#include <stdlib.h>
45#include <string.h>
46
47#include <openssl/crypto.h>
48#include <openssl/sha.h>
49#include <openssl/opensslv.h>
50
51#include <openssl/local/cryptlib.h>
52
53const char SHA512_version[]="SHA-512" OPENSSL_VERSION_PTEXT;
54
55#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57    defined(__s390__) || defined(__s390x__) || \
58    defined(SHA512_ASM)
59#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
60#endif
61
62int SHA384_Init (SHA512_CTX *c)
63	{
64#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
65	/* maintain dword order required by assembler module */
66	unsigned int *h = (unsigned int *)c->h;
67
68	h[0]  = 0xcbbb9d5d; h[1]  = 0xc1059ed8;
69	h[2]  = 0x629a292a; h[3]  = 0x367cd507;
70	h[4]  = 0x9159015a; h[5]  = 0x3070dd17;
71	h[6]  = 0x152fecd8; h[7]  = 0xf70e5939;
72	h[8]  = 0x67332667; h[9]  = 0xffc00b31;
73	h[10] = 0x8eb44a87; h[11] = 0x68581511;
74	h[12] = 0xdb0c2e0d; h[13] = 0x64f98fa7;
75	h[14] = 0x47b5481d; h[15] = 0xbefa4fa4;
76#else
77	c->h[0]=U64(0xcbbb9d5dc1059ed8);
78	c->h[1]=U64(0x629a292a367cd507);
79	c->h[2]=U64(0x9159015a3070dd17);
80	c->h[3]=U64(0x152fecd8f70e5939);
81	c->h[4]=U64(0x67332667ffc00b31);
82	c->h[5]=U64(0x8eb44a8768581511);
83	c->h[6]=U64(0xdb0c2e0d64f98fa7);
84	c->h[7]=U64(0x47b5481dbefa4fa4);
85#endif
86        c->Nl=0;        c->Nh=0;
87        c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
88        return 1;
89	}
90
91int SHA512_Init (SHA512_CTX *c)
92	{
93#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
94	/* maintain dword order required by assembler module */
95	unsigned int *h = (unsigned int *)c->h;
96
97	h[0]  = 0x6a09e667; h[1]  = 0xf3bcc908;
98	h[2]  = 0xbb67ae85; h[3]  = 0x84caa73b;
99	h[4]  = 0x3c6ef372; h[5]  = 0xfe94f82b;
100	h[6]  = 0xa54ff53a; h[7]  = 0x5f1d36f1;
101	h[8]  = 0x510e527f; h[9]  = 0xade682d1;
102	h[10] = 0x9b05688c; h[11] = 0x2b3e6c1f;
103	h[12] = 0x1f83d9ab; h[13] = 0xfb41bd6b;
104	h[14] = 0x5be0cd19; h[15] = 0x137e2179;
105#else
106	c->h[0]=U64(0x6a09e667f3bcc908);
107	c->h[1]=U64(0xbb67ae8584caa73b);
108	c->h[2]=U64(0x3c6ef372fe94f82b);
109	c->h[3]=U64(0xa54ff53a5f1d36f1);
110	c->h[4]=U64(0x510e527fade682d1);
111	c->h[5]=U64(0x9b05688c2b3e6c1f);
112	c->h[6]=U64(0x1f83d9abfb41bd6b);
113	c->h[7]=U64(0x5be0cd19137e2179);
114#endif
115        c->Nl=0;        c->Nh=0;
116        c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
117        return 1;
118	}
119
120#ifndef SHA512_ASM
121static
122#endif
123void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
124
125int SHA512_Final (unsigned char *md, SHA512_CTX *c)
126	{
127	unsigned char *p=(unsigned char *)c->u.p;
128	size_t n=c->num;
129
130	p[n]=0x80;	/* There always is a room for one */
131	n++;
132	if (n > (sizeof(c->u)-16))
133		memset (p+n,0,sizeof(c->u)-n), n=0,
134		sha512_block_data_order (c,p,1);
135
136	memset (p+n,0,sizeof(c->u)-16-n);
137#ifdef	B_ENDIAN
138	c->u.d[SHA_LBLOCK-2] = c->Nh;
139	c->u.d[SHA_LBLOCK-1] = c->Nl;
140#else
141	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
142	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
143	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
144	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
145	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
146	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
147	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
148	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
149	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
150	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
151	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
152	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
153	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
154	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
155	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
156	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
157#endif
158
159	sha512_block_data_order (c,p,1);
160
161	if (md==0) return 0;
162
163#if defined(SHA512_ASM) && (defined(__arm__) || defined(__arm))
164	/* recall assembler dword order... */
165	n = c->md_len;
166	if (n == SHA384_DIGEST_LENGTH || n == SHA512_DIGEST_LENGTH)
167		{
168		unsigned int *h = (unsigned int *)c->h, t;
169
170		for (n/=4;n;n--)
171			{
172			t = *(h++);
173			*(md++) = (unsigned char)(t>>24);
174			*(md++) = (unsigned char)(t>>16);
175			*(md++) = (unsigned char)(t>>8);
176			*(md++) = (unsigned char)(t);
177			}
178		}
179	else	return 0;
180#else
181	switch (c->md_len)
182		{
183		/* Let compiler decide if it's appropriate to unroll... */
184		case SHA384_DIGEST_LENGTH:
185			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
186				{
187				SHA_LONG64 t = c->h[n];
188
189				*(md++)	= (unsigned char)(t>>56);
190				*(md++)	= (unsigned char)(t>>48);
191				*(md++)	= (unsigned char)(t>>40);
192				*(md++)	= (unsigned char)(t>>32);
193				*(md++)	= (unsigned char)(t>>24);
194				*(md++)	= (unsigned char)(t>>16);
195				*(md++)	= (unsigned char)(t>>8);
196				*(md++)	= (unsigned char)(t);
197				}
198			break;
199		case SHA512_DIGEST_LENGTH:
200			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
201				{
202				SHA_LONG64 t = c->h[n];
203
204				*(md++)	= (unsigned char)(t>>56);
205				*(md++)	= (unsigned char)(t>>48);
206				*(md++)	= (unsigned char)(t>>40);
207				*(md++)	= (unsigned char)(t>>32);
208				*(md++)	= (unsigned char)(t>>24);
209				*(md++)	= (unsigned char)(t>>16);
210				*(md++)	= (unsigned char)(t>>8);
211				*(md++)	= (unsigned char)(t);
212				}
213			break;
214		/* ... as well as make sure md_len is not abused. */
215		default:	return 0;
216		}
217#endif
218	return 1;
219	}
220
221int SHA384_Final (unsigned char *md,SHA512_CTX *c)
222{   return SHA512_Final (md,c);   }
223
224int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
225	{
226	SHA_LONG64	l;
227	unsigned char  *p=c->u.p;
228	const unsigned char *data=(const unsigned char *)_data;
229
230	if (len==0) return  1;
231
232	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
233	if (l < c->Nl)		c->Nh++;
234	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
235	c->Nl=l;
236
237	if (c->num != 0)
238		{
239		size_t n = sizeof(c->u) - c->num;
240
241		if (len < n)
242			{
243			memcpy (p+c->num,data,len), c->num += (unsigned int)len;
244			return 1;
245			}
246		else	{
247			memcpy (p+c->num,data,n), c->num = 0;
248			len-=n, data+=n;
249			sha512_block_data_order (c,p,1);
250			}
251		}
252
253	if (len >= sizeof(c->u))
254		{
255#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
256		if ((size_t)data%sizeof(c->u.d[0]) != 0)
257			while (len >= sizeof(c->u))
258				memcpy (p,data,sizeof(c->u)),
259				sha512_block_data_order (c,p,1),
260				len  -= sizeof(c->u),
261				data += sizeof(c->u);
262		else
263#endif
264			sha512_block_data_order (c,data,len/sizeof(c->u)),
265			data += len,
266			len  %= sizeof(c->u),
267			data -= len;
268		}
269
270	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
271
272	return 1;
273	}
274
275int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
276{   return SHA512_Update (c,data,len);   }
277
278void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
279{   sha512_block_data_order (c,data,1);  }
280
281unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
282	{
283	SHA512_CTX c;
284	static unsigned char m[SHA384_DIGEST_LENGTH];
285
286	if (md == NULL) md=m;
287	SHA384_Init(&c);
288	SHA512_Update(&c,d,n);
289	SHA512_Final(md,&c);
290	OPENSSL_cleanse(&c,sizeof(c));
291	return(md);
292	}
293
294unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
295	{
296	SHA512_CTX c;
297	static unsigned char m[SHA512_DIGEST_LENGTH];
298
299	if (md == NULL) md=m;
300	SHA512_Init(&c);
301	SHA512_Update(&c,d,n);
302	SHA512_Final(md,&c);
303	OPENSSL_cleanse(&c,sizeof(c));
304	return(md);
305	}
306
307#ifndef SHA512_ASM
308static const SHA_LONG64 K512[80] = {
309        U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
310        U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
311        U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
312        U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
313        U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
314        U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
315        U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
316        U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
317        U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
318        U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
319        U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
320        U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
321        U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
322        U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
323        U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
324        U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
325        U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
326        U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
327        U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
328        U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
329        U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
330        U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
331        U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
332        U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
333        U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
334        U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
335        U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
336        U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
337        U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
338        U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
339        U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
340        U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
341        U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
342        U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
343        U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
344        U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
345        U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
346        U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
347        U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
348        U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
349
350#ifndef PEDANTIC
351# if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
352#  if defined(__x86_64) || defined(__x86_64__)
353#   define ROTR(a,n)	({ SHA_LONG64 ret;		\
354				asm ("rorq %1,%0"	\
355				: "=r"(ret)		\
356				: "J"(n),"0"(a)		\
357				: "cc"); ret;		})
358#   if !defined(B_ENDIAN)
359#    define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
360				asm ("bswapq	%0"		\
361				: "=r"(ret)			\
362				: "0"(ret)); ret;		})
363#   endif
364#  elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
365#   if defined(I386_ONLY)
366#    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
367			 unsigned int hi=p[0],lo=p[1];		\
368				asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
369				    "roll $16,%%eax; roll $16,%%edx; "\
370				    "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
371				: "=a"(lo),"=d"(hi)		\
372				: "0"(lo),"1"(hi) : "cc");	\
373				((SHA_LONG64)hi)<<32|lo;	})
374#   else
375#    define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
376			 unsigned int hi=p[0],lo=p[1];		\
377				asm ("bswapl %0; bswapl %1;"	\
378				: "=r"(lo),"=r"(hi)		\
379				: "0"(lo),"1"(hi));		\
380				((SHA_LONG64)hi)<<32|lo;	})
381#   endif
382#  elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
383#   define ROTR(a,n)	({ SHA_LONG64 ret;		\
384				asm ("rotrdi %0,%1,%2"	\
385				: "=r"(ret)		\
386				: "r"(a),"K"(n)); ret;	})
387#  endif
388# elif defined(_MSC_VER)
389#  if defined(_WIN64)	/* applies to both IA-64 and AMD64 */
390#   pragma intrinsic(_rotr64)
391#   define ROTR(a,n)	_rotr64((a),n)
392#  endif
393#  if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
394#   if defined(I386_ONLY)
395    static SHA_LONG64 __fastcall __pull64be(const void *x)
396    {	_asm	mov	edx, [ecx + 0]
397	_asm	mov	eax, [ecx + 4]
398	_asm	xchg	dh,dl
399	_asm	xchg	ah,al
400	_asm	rol	edx,16
401	_asm	rol	eax,16
402	_asm	xchg	dh,dl
403	_asm	xchg	ah,al
404    }
405#   else
406    static SHA_LONG64 __fastcall __pull64be(const void *x)
407    {	_asm	mov	edx, [ecx + 0]
408	_asm	mov	eax, [ecx + 4]
409	_asm	bswap	edx
410	_asm	bswap	eax
411    }
412#   endif
413#   define PULL64(x) __pull64be(&(x))
414#   if _MSC_VER<=1200
415#    pragma inline_depth(0)
416#   endif
417#  endif
418# endif
419#endif
420
421#ifndef PULL64
422#define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
423#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
424#endif
425
426#ifndef ROTR
427#define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
428#endif
429
430#define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
431#define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
432#define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
433#define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
434
435#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
436#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
437
438
439#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
440/*
441 * This code should give better results on 32-bit CPU with less than
442 * ~24 registers, both size and performance wise...
443 */
444static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
445	{
446	const SHA_LONG64 *W=in;
447	SHA_LONG64	A,E,T;
448	SHA_LONG64	X[9+80],*F;
449	int i;
450
451			while (num--) {
452
453	F    = X+80;
454	A    = ctx->h[0];	F[1] = ctx->h[1];
455	F[2] = ctx->h[2];	F[3] = ctx->h[3];
456	E    = ctx->h[4];	F[5] = ctx->h[5];
457	F[6] = ctx->h[6];	F[7] = ctx->h[7];
458
459	for (i=0;i<16;i++,F--)
460		{
461#ifdef B_ENDIAN
462		T = W[i];
463#else
464		T = PULL64(W[i]);
465#endif
466		F[0] = A;
467		F[4] = E;
468		F[8] = T;
469		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
470		E    = F[3] + T;
471		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
472		}
473
474	for (;i<80;i++,F--)
475		{
476		T    = sigma0(F[8+16-1]);
477		T   += sigma1(F[8+16-14]);
478		T   += F[8+16] + F[8+16-9];
479
480		F[0] = A;
481		F[4] = E;
482		F[8] = T;
483		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
484		E    = F[3] + T;
485		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
486		}
487
488	ctx->h[0] += A;		ctx->h[1] += F[1];
489	ctx->h[2] += F[2];	ctx->h[3] += F[3];
490	ctx->h[4] += E;		ctx->h[5] += F[5];
491	ctx->h[6] += F[6];	ctx->h[7] += F[7];
492
493			W+=SHA_LBLOCK;
494			}
495	}
496
497#elif defined(OPENSSL_SMALL_FOOTPRINT)
498
499static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
500	{
501	const SHA_LONG64 *W=in;
502	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
503	SHA_LONG64	X[16];
504	int i;
505
506			while (num--) {
507
508	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
509	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
510
511	for (i=0;i<16;i++)
512		{
513#ifdef B_ENDIAN
514		T1 = X[i] = W[i];
515#else
516		T1 = X[i] = PULL64(W[i]);
517#endif
518		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
519		T2 = Sigma0(a) + Maj(a,b,c);
520		h = g;	g = f;	f = e;	e = d + T1;
521		d = c;	c = b;	b = a;	a = T1 + T2;
522		}
523
524	for (;i<80;i++)
525		{
526		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
527		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
528
529		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
530		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
531		T2 = Sigma0(a) + Maj(a,b,c);
532		h = g;	g = f;	f = e;	e = d + T1;
533		d = c;	c = b;	b = a;	a = T1 + T2;
534		}
535
536	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
537	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
538
539			W+=SHA_LBLOCK;
540			}
541	}
542
543#else
544
545#define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
546	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
547	h = Sigma0(a) + Maj(a,b,c);			\
548	d += T1;	h += T1;		} while (0)
549
550#define	ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)	do {	\
551	s0 = X[(j+1)&0x0f];	s0 = sigma0(s0);	\
552	s1 = X[(j+14)&0x0f];	s1 = sigma1(s1);	\
553	T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];	\
554	ROUND_00_15(i+j,a,b,c,d,e,f,g,h);		} while (0)
555
556static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
557	{
558	const SHA_LONG64 *W=in;
559	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
560	SHA_LONG64	X[16];
561	int i;
562
563			while (num--) {
564
565	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
566	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
567
568#ifdef B_ENDIAN
569	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
570	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
571	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
572	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
573	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
574	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
575	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
576	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
577	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
578	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
579	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
580	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
581	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
582	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
583	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
584	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
585#else
586	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
587	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
588	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
589	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
590	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
591	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
592	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
593	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
594	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
595	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
596	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
597	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
598	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
599	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
600	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
601	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
602#endif
603
604	for (i=16;i<80;i+=16)
605		{
606		ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
607		ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
608		ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
609		ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
610		ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
611		ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
612		ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
613		ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
614		ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
615		ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
616		ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
617		ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
618		ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
619		ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
620		ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
621		ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
622		}
623
624	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
625	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
626
627			W+=SHA_LBLOCK;
628			}
629	}
630
631#endif
632
633#endif /* SHA512_ASM */
634
635#else /* !OPENSSL_NO_SHA512 */
636
637#if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
638static void *dummy=&dummy;
639#endif
640
641#endif /* !OPENSSL_NO_SHA512 */
642