sha512.c revision 1.21
1/* $OpenBSD: sha512.c,v 1.21 2023/03/27 10:13:08 jsing Exp $ */
2/* ====================================================================
3 * Copyright (c) 1998-2011 The OpenSSL Project.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in
14 *    the documentation and/or other materials provided with the
15 *    distribution.
16 *
17 * 3. All advertising materials mentioning features or use of this
18 *    software must display the following acknowledgment:
19 *    "This product includes software developed by the OpenSSL Project
20 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21 *
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 *    endorse or promote products derived from this software without
24 *    prior written permission. For written permission, please contact
25 *    openssl-core@openssl.org.
26 *
27 * 5. Products derived from this software may not be called "OpenSSL"
28 *    nor may "OpenSSL" appear in their names without prior written
29 *    permission of the OpenSSL Project.
30 *
31 * 6. Redistributions of any form whatsoever must retain the following
32 *    acknowledgment:
33 *    "This product includes software developed by the OpenSSL Project
34 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35 *
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
49 *
50 * This product includes cryptographic software written by Eric Young
51 * (eay@cryptsoft.com).  This product includes software written by Tim
52 * Hudson (tjh@cryptsoft.com).
53 */
54
55#include <endian.h>
56#include <stdlib.h>
57#include <string.h>
58
59#include <openssl/opensslconf.h>
60
61#include <openssl/crypto.h>
62#include <openssl/sha.h>
63
64#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
65/*
66 * IMPLEMENTATION NOTES.
67 *
68 * As you might have noticed 32-bit hash algorithms:
69 *
70 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
71 * - optimized versions implement two transform functions: one operating
72 *   on [aligned] data in host byte order and one - on data in input
73 *   stream byte order;
74 * - share common byte-order neutral collector and padding function
75 *   implementations, ../md32_common.h;
76 *
77 * Neither of the above applies to this SHA-512 implementations. Reasons
78 * [in reverse order] are:
79 *
80 * - it's the only 64-bit hash algorithm for the moment of this writing,
81 *   there is no need for common collector/padding implementation [yet];
82 * - by supporting only one transform function [which operates on
83 *   *aligned* data in input stream byte order, big-endian in this case]
84 *   we minimize burden of maintenance in two ways: a) collector/padding
85 *   function is simpler; b) only one transform function to stare at;
86 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
87 *   apply a number of optimizations to mitigate potential performance
88 *   penalties caused by previous design decision;
89 *
90 * Caveat lector.
91 *
92 * Implementation relies on the fact that "long long" is 64-bit on
93 * both 32- and 64-bit platforms. If some compiler vendor comes up
94 * with 128-bit long long, adjustment to sha.h would be required.
95 * As this implementation relies on 64-bit integer type, it's totally
96 * inappropriate for platforms which don't support it, most notably
97 * 16-bit platforms.
98 *					<appro@fy.chalmers.se>
99 */
100
101#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM)
102#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
103#endif
104
105int
106SHA384_Init(SHA512_CTX *c)
107{
108	c->h[0] = U64(0xcbbb9d5dc1059ed8);
109	c->h[1] = U64(0x629a292a367cd507);
110	c->h[2] = U64(0x9159015a3070dd17);
111	c->h[3] = U64(0x152fecd8f70e5939);
112	c->h[4] = U64(0x67332667ffc00b31);
113	c->h[5] = U64(0x8eb44a8768581511);
114	c->h[6] = U64(0xdb0c2e0d64f98fa7);
115	c->h[7] = U64(0x47b5481dbefa4fa4);
116
117	c->Nl = 0;
118	c->Nh = 0;
119	c->num = 0;
120	c->md_len = SHA384_DIGEST_LENGTH;
121	return 1;
122}
123
124int
125SHA512_Init(SHA512_CTX *c)
126{
127	c->h[0] = U64(0x6a09e667f3bcc908);
128	c->h[1] = U64(0xbb67ae8584caa73b);
129	c->h[2] = U64(0x3c6ef372fe94f82b);
130	c->h[3] = U64(0xa54ff53a5f1d36f1);
131	c->h[4] = U64(0x510e527fade682d1);
132	c->h[5] = U64(0x9b05688c2b3e6c1f);
133	c->h[6] = U64(0x1f83d9abfb41bd6b);
134	c->h[7] = U64(0x5be0cd19137e2179);
135
136	c->Nl = 0;
137	c->Nh = 0;
138	c->num = 0;
139	c->md_len = SHA512_DIGEST_LENGTH;
140	return 1;
141}
142
143#ifndef SHA512_ASM
144static
145#endif
146void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
147
148int
149SHA512_Final(unsigned char *md, SHA512_CTX *c)
150{
151	unsigned char *p = (unsigned char *)c->u.p;
152	size_t n = c->num;
153
154	p[n]=0x80;	/* There always is a room for one */
155	n++;
156	if (n > (sizeof(c->u) - 16)) {
157		memset(p + n, 0, sizeof(c->u) - n);
158		n = 0;
159		sha512_block_data_order(c, p, 1);
160	}
161
162	memset (p + n, 0, sizeof(c->u) - 16 - n);
163#if BYTE_ORDER == BIG_ENDIAN
164	c->u.d[SHA_LBLOCK - 2] = c->Nh;
165	c->u.d[SHA_LBLOCK - 1] = c->Nl;
166#else
167	p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
168	p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
169	p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
170	p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
171	p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
172	p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
173	p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
174	p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
175	p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
176	p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
177	p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
178	p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
179	p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
180	p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
181	p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
182	p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
183#endif
184
185	sha512_block_data_order(c, p, 1);
186
187	if (md == 0)
188		return 0;
189
190	switch (c->md_len) {
191		/* Let compiler decide if it's appropriate to unroll... */
192	case SHA384_DIGEST_LENGTH:
193		for (n = 0; n < SHA384_DIGEST_LENGTH/8; n++) {
194			SHA_LONG64 t = c->h[n];
195
196			*(md++) = (unsigned char)(t >> 56);
197			*(md++) = (unsigned char)(t >> 48);
198			*(md++) = (unsigned char)(t >> 40);
199			*(md++) = (unsigned char)(t >> 32);
200			*(md++) = (unsigned char)(t >> 24);
201			*(md++) = (unsigned char)(t >> 16);
202			*(md++) = (unsigned char)(t >> 8);
203			*(md++) = (unsigned char)(t);
204		}
205		break;
206	case SHA512_DIGEST_LENGTH:
207		for (n = 0; n < SHA512_DIGEST_LENGTH/8; n++) {
208			SHA_LONG64 t = c->h[n];
209
210			*(md++) = (unsigned char)(t >> 56);
211			*(md++) = (unsigned char)(t >> 48);
212			*(md++) = (unsigned char)(t >> 40);
213			*(md++) = (unsigned char)(t >> 32);
214			*(md++) = (unsigned char)(t >> 24);
215			*(md++) = (unsigned char)(t >> 16);
216			*(md++) = (unsigned char)(t >> 8);
217			*(md++) = (unsigned char)(t);
218		}
219		break;
220		/* ... as well as make sure md_len is not abused. */
221	default:
222		return 0;
223	}
224
225	return 1;
226}
227
228int
229SHA384_Final(unsigned char *md, SHA512_CTX *c)
230{
231	return SHA512_Final(md, c);
232}
233
234int
235SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
236{
237	SHA_LONG64	l;
238	unsigned char  *p = c->u.p;
239	const unsigned char *data = (const unsigned char *)_data;
240
241	if (len == 0)
242		return 1;
243
244	l = (c->Nl + (((SHA_LONG64)len) << 3))&U64(0xffffffffffffffff);
245	if (l < c->Nl)
246		c->Nh++;
247	if (sizeof(len) >= 8)
248		c->Nh += (((SHA_LONG64)len) >> 61);
249	c->Nl = l;
250
251	if (c->num != 0) {
252		size_t n = sizeof(c->u) - c->num;
253
254		if (len < n) {
255			memcpy(p + c->num, data, len);
256			c->num += (unsigned int)len;
257			return 1;
258		} else{
259			memcpy(p + c->num, data, n);
260			c->num = 0;
261			len -= n;
262			data += n;
263			sha512_block_data_order(c, p, 1);
264		}
265	}
266
267	if (len >= sizeof(c->u)) {
268#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
269		if ((size_t)data % sizeof(c->u.d[0]) != 0) {
270			while (len >= sizeof(c->u)) {
271				memcpy(p, data, sizeof(c->u));
272				sha512_block_data_order(c, p, 1);
273				len -= sizeof(c->u);
274				data += sizeof(c->u);
275			}
276		} else
277#endif
278		{
279			sha512_block_data_order(c, data, len/sizeof(c->u));
280			data += len;
281			len %= sizeof(c->u);
282			data -= len;
283		}
284	}
285
286	if (len != 0) {
287		memcpy(p, data, len);
288		c->num = (int)len;
289	}
290
291	return 1;
292}
293
294int
295SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
296{
297	return SHA512_Update(c, data, len);
298}
299
300void
301SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
302{
303#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
304	if ((size_t)data % sizeof(c->u.d[0]) != 0)
305		memcpy(c->u.p, data, sizeof(c->u.p)),
306		    data = c->u.p;
307#endif
308	sha512_block_data_order(c, data, 1);
309}
310
311unsigned char *
312SHA384(const unsigned char *d, size_t n, unsigned char *md)
313{
314	SHA512_CTX c;
315	static unsigned char m[SHA384_DIGEST_LENGTH];
316
317	if (md == NULL)
318		md = m;
319
320	SHA384_Init(&c);
321	SHA512_Update(&c, d, n);
322	SHA512_Final(md, &c);
323
324	explicit_bzero(&c, sizeof(c));
325
326	return (md);
327}
328
329unsigned char *
330SHA512(const unsigned char *d, size_t n, unsigned char *md)
331{
332	SHA512_CTX c;
333	static unsigned char m[SHA512_DIGEST_LENGTH];
334
335	if (md == NULL)
336		md = m;
337
338	SHA512_Init(&c);
339	SHA512_Update(&c, d, n);
340	SHA512_Final(md, &c);
341
342	explicit_bzero(&c, sizeof(c));
343
344	return (md);
345}
346
347#ifndef SHA512_ASM
348static const SHA_LONG64 K512[80] = {
349	U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
350	U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
351	U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
352	U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
353	U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
354	U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
355	U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
356	U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
357	U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
358	U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
359	U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
360	U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
361	U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
362	U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
363	U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
364	U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
365	U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
366	U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
367	U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
368	U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
369	U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
370	U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
371	U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
372	U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
373	U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
374	U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
375	U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
376	U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
377	U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
378	U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
379	U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
380	U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
381	U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
382	U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
383	U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
384	U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
385	U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
386	U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
387	U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
388	U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817),
389};
390
391#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
392# if defined(__x86_64) || defined(__x86_64__)
393#  define ROTR(a,n)	({ SHA_LONG64 ret;		\
394				asm ("rorq %1,%0"	\
395				: "=r"(ret)		\
396				: "J"(n),"0"(a)		\
397				: "cc"); ret;		})
398#   define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
399				asm ("bswapq	%0"		\
400				: "=r"(ret)			\
401				: "0"(ret)); ret;		})
402# elif (defined(__i386) || defined(__i386__))
403#   define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
404			 unsigned int hi=p[0],lo=p[1];		\
405				asm ("bswapl %0; bswapl %1;"	\
406				: "=r"(lo),"=r"(hi)		\
407				: "0"(lo),"1"(hi));		\
408				((SHA_LONG64)hi)<<32|lo;	})
409# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
410#  define ROTR(a,n)	({ SHA_LONG64 ret;		\
411				asm ("rotrdi %0,%1,%2"	\
412				: "=r"(ret)		\
413				: "r"(a),"K"(n)); ret;	})
414# endif
415#endif
416
417#ifndef PULL64
418#define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
419#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
420#endif
421
422#ifndef ROTR
423#define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
424#endif
425
426#define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
427#define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
428#define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
429#define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
430
431#define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
432#define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
433
434
435#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
436/*
437 * This code should give better results on 32-bit CPU with less than
438 * ~24 registers, both size and performance wise...
439 */
440static void
441sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num)
442{
443	const SHA_LONG64 *W = in;
444	SHA_LONG64	A, E, T;
445	SHA_LONG64	X[9 + 80], *F;
446	int i;
447
448	while (num--) {
449
450		F = X + 80;
451		A = ctx->h[0];
452		F[1] = ctx->h[1];
453		F[2] = ctx->h[2];
454		F[3] = ctx->h[3];
455		E = ctx->h[4];
456		F[5] = ctx->h[5];
457		F[6] = ctx->h[6];
458		F[7] = ctx->h[7];
459
460		for (i = 0; i < 16; i++, F--) {
461			T = PULL64(W[i]);
462			F[0] = A;
463			F[4] = E;
464			F[8] = T;
465			T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
466			E = F[3] + T;
467			A = T + Sigma0(A) + Maj(A, F[1], F[2]);
468		}
469
470		for (; i < 80; i++, F--) {
471			T = sigma0(F[8 + 16 - 1]);
472			T += sigma1(F[8 + 16 - 14]);
473			T += F[8 + 16] + F[8 + 16 - 9];
474
475			F[0] = A;
476			F[4] = E;
477			F[8] = T;
478			T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
479			E = F[3] + T;
480			A = T + Sigma0(A) + Maj(A, F[1], F[2]);
481		}
482
483		ctx->h[0] += A;
484		ctx->h[1] += F[1];
485		ctx->h[2] += F[2];
486		ctx->h[3] += F[3];
487		ctx->h[4] += E;
488		ctx->h[5] += F[5];
489		ctx->h[6] += F[6];
490		ctx->h[7] += F[7];
491
492		W += SHA_LBLOCK;
493	}
494}
495
496#elif defined(OPENSSL_SMALL_FOOTPRINT)
497
498static void
499sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num)
500{
501	const SHA_LONG64 *W = in;
502	SHA_LONG64	a, b,c, d,e, f,g, h,s0, s1, T1, T2;
503	SHA_LONG64	X[16];
504	int i;
505
506	while (num--) {
507
508		a = ctx->h[0];
509		b = ctx->h[1];
510		c = ctx->h[2];
511		d = ctx->h[3];
512		e = ctx->h[4];
513		f = ctx->h[5];
514		g = ctx->h[6];
515		h = ctx->h[7];
516
517		for (i = 0; i < 16; i++) {
518#if BYTE_ORDER == BIG_ENDIAN
519			T1 = X[i] = W[i];
520#else
521			T1 = X[i] = PULL64(W[i]);
522#endif
523			T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
524			T2 = Sigma0(a) + Maj(a, b, c);
525			h = g;
526			g = f;
527			f = e;
528			e = d + T1;
529			d = c;
530			c = b;
531			b = a;
532			a = T1 + T2;
533		}
534
535		for (; i < 80; i++) {
536			s0 = X[(i + 1)&0x0f];
537			s0 = sigma0(s0);
538			s1 = X[(i + 14)&0x0f];
539			s1 = sigma1(s1);
540
541			T1 = X[i&0xf] += s0 + s1 + X[(i + 9)&0xf];
542			T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
543			T2 = Sigma0(a) + Maj(a, b, c);
544			h = g;
545			g = f;
546			f = e;
547			e = d + T1;
548			d = c;
549			c = b;
550			b = a;
551			a = T1 + T2;
552		}
553
554		ctx->h[0] += a;
555		ctx->h[1] += b;
556		ctx->h[2] += c;
557		ctx->h[3] += d;
558		ctx->h[4] += e;
559		ctx->h[5] += f;
560		ctx->h[6] += g;
561		ctx->h[7] += h;
562
563		W += SHA_LBLOCK;
564	}
565}
566
567#else
568
569#define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
570	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
571	h = Sigma0(a) + Maj(a,b,c);			\
572	d += T1;	h += T1;		} while (0)
573
574#define	ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)	do {	\
575	s0 = X[(j+1)&0x0f];	s0 = sigma0(s0);	\
576	s1 = X[(j+14)&0x0f];	s1 = sigma1(s1);	\
577	T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];	\
578	ROUND_00_15(i+j,a,b,c,d,e,f,g,h);		} while (0)
579
580static void
581sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num)
582{
583	const SHA_LONG64 *W = in;
584	SHA_LONG64	a, b,c, d,e, f,g, h,s0, s1, T1;
585	SHA_LONG64	X[16];
586	int i;
587
588	while (num--) {
589
590		a = ctx->h[0];
591		b = ctx->h[1];
592		c = ctx->h[2];
593		d = ctx->h[3];
594		e = ctx->h[4];
595		f = ctx->h[5];
596		g = ctx->h[6];
597		h = ctx->h[7];
598
599#if BYTE_ORDER == BIG_ENDIAN
600		T1 = X[0] = W[0];
601		ROUND_00_15(0, a,b, c,d, e,f, g, h);
602		T1 = X[1] = W[1];
603		ROUND_00_15(1, h,a, b,c, d,e, f, g);
604		T1 = X[2] = W[2];
605		ROUND_00_15(2, g,h, a,b, c,d, e, f);
606		T1 = X[3] = W[3];
607		ROUND_00_15(3, f,g, h,a, b,c, d, e);
608		T1 = X[4] = W[4];
609		ROUND_00_15(4, e,f, g,h, a,b, c, d);
610		T1 = X[5] = W[5];
611		ROUND_00_15(5, d,e, f,g, h,a, b, c);
612		T1 = X[6] = W[6];
613		ROUND_00_15(6, c,d, e,f, g,h, a, b);
614		T1 = X[7] = W[7];
615		ROUND_00_15(7, b,c, d,e, f,g, h, a);
616		T1 = X[8] = W[8];
617		ROUND_00_15(8, a,b, c,d, e,f, g, h);
618		T1 = X[9] = W[9];
619		ROUND_00_15(9, h,a, b,c, d,e, f, g);
620		T1 = X[10] = W[10];
621		ROUND_00_15(10, g,h, a,b, c,d, e, f);
622		T1 = X[11] = W[11];
623		ROUND_00_15(11, f,g, h,a, b,c, d, e);
624		T1 = X[12] = W[12];
625		ROUND_00_15(12, e,f, g,h, a,b, c, d);
626		T1 = X[13] = W[13];
627		ROUND_00_15(13, d,e, f,g, h,a, b, c);
628		T1 = X[14] = W[14];
629		ROUND_00_15(14, c,d, e,f, g,h, a, b);
630		T1 = X[15] = W[15];
631		ROUND_00_15(15, b,c, d,e, f,g, h, a);
632#else
633		T1 = X[0] = PULL64(W[0]);
634		ROUND_00_15(0, a,b, c,d, e,f, g, h);
635		T1 = X[1] = PULL64(W[1]);
636		ROUND_00_15(1, h,a, b,c, d,e, f, g);
637		T1 = X[2] = PULL64(W[2]);
638		ROUND_00_15(2, g,h, a,b, c,d, e, f);
639		T1 = X[3] = PULL64(W[3]);
640		ROUND_00_15(3, f,g, h,a, b,c, d, e);
641		T1 = X[4] = PULL64(W[4]);
642		ROUND_00_15(4, e,f, g,h, a,b, c, d);
643		T1 = X[5] = PULL64(W[5]);
644		ROUND_00_15(5, d,e, f,g, h,a, b, c);
645		T1 = X[6] = PULL64(W[6]);
646		ROUND_00_15(6, c,d, e,f, g,h, a, b);
647		T1 = X[7] = PULL64(W[7]);
648		ROUND_00_15(7, b,c, d,e, f,g, h, a);
649		T1 = X[8] = PULL64(W[8]);
650		ROUND_00_15(8, a,b, c,d, e,f, g, h);
651		T1 = X[9] = PULL64(W[9]);
652		ROUND_00_15(9, h,a, b,c, d,e, f, g);
653		T1 = X[10] = PULL64(W[10]);
654		ROUND_00_15(10, g,h, a,b, c,d, e, f);
655		T1 = X[11] = PULL64(W[11]);
656		ROUND_00_15(11, f,g, h,a, b,c, d, e);
657		T1 = X[12] = PULL64(W[12]);
658		ROUND_00_15(12, e,f, g,h, a,b, c, d);
659		T1 = X[13] = PULL64(W[13]);
660		ROUND_00_15(13, d,e, f,g, h,a, b, c);
661		T1 = X[14] = PULL64(W[14]);
662		ROUND_00_15(14, c,d, e,f, g,h, a, b);
663		T1 = X[15] = PULL64(W[15]);
664		ROUND_00_15(15, b,c, d,e, f,g, h, a);
665#endif
666
667		for (i = 16; i < 80; i += 16) {
668			ROUND_16_80(i, 0, a,b, c,d, e,f, g,h, X);
669			ROUND_16_80(i, 1, h,a, b,c, d,e, f,g, X);
670			ROUND_16_80(i, 2, g,h, a,b, c,d, e,f, X);
671			ROUND_16_80(i, 3, f,g, h,a, b,c, d,e, X);
672			ROUND_16_80(i, 4, e,f, g,h, a,b, c,d, X);
673			ROUND_16_80(i, 5, d,e, f,g, h,a, b,c, X);
674			ROUND_16_80(i, 6, c,d, e,f, g,h, a,b, X);
675			ROUND_16_80(i, 7, b,c, d,e, f,g, h,a, X);
676			ROUND_16_80(i, 8, a,b, c,d, e,f, g,h, X);
677			ROUND_16_80(i, 9, h,a, b,c, d,e, f,g, X);
678			ROUND_16_80(i, 10, g,h, a,b, c,d, e,f, X);
679			ROUND_16_80(i, 11, f,g, h,a, b,c, d,e, X);
680			ROUND_16_80(i, 12, e,f, g,h, a,b, c,d, X);
681			ROUND_16_80(i, 13, d,e, f,g, h,a, b,c, X);
682			ROUND_16_80(i, 14, c,d, e,f, g,h, a,b, X);
683			ROUND_16_80(i, 15, b,c, d,e, f,g, h,a, X);
684		}
685
686		ctx->h[0] += a;
687		ctx->h[1] += b;
688		ctx->h[2] += c;
689		ctx->h[3] += d;
690		ctx->h[4] += e;
691		ctx->h[5] += f;
692		ctx->h[6] += g;
693		ctx->h[7] += h;
694
695		W += SHA_LBLOCK;
696	}
697}
698
699#endif
700
701#endif /* SHA512_ASM */
702
703#endif /* !OPENSSL_NO_SHA512 */
704