sha512.c revision 290207
1/* crypto/sha/sha512.c */
2/* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
6 */
7#include <openssl/opensslconf.h>
8#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9/*-
10 * IMPLEMENTATION NOTES.
11 *
12 * As you might have noticed 32-bit hash algorithms:
13 *
14 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15 * - optimized versions implement two transform functions: one operating
16 *   on [aligned] data in host byte order and one - on data in input
17 *   stream byte order;
18 * - share common byte-order neutral collector and padding function
19 *   implementations, ../md32_common.h;
20 *
21 * Neither of the above applies to this SHA-512 implementations. Reasons
22 * [in reverse order] are:
23 *
24 * - it's the only 64-bit hash algorithm for the moment of this writing,
25 *   there is no need for common collector/padding implementation [yet];
26 * - by supporting only one transform function [which operates on
27 *   *aligned* data in input stream byte order, big-endian in this case]
28 *   we minimize burden of maintenance in two ways: a) collector/padding
29 *   function is simpler; b) only one transform function to stare at;
30 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31 *   apply a number of optimizations to mitigate potential performance
32 *   penalties caused by previous design decision;
33 *
34 * Caveat lector.
35 *
36 * Implementation relies on the fact that "long long" is 64-bit on
37 * both 32- and 64-bit platforms. If some compiler vendor comes up
38 * with 128-bit long long, adjustment to sha.h would be required.
39 * As this implementation relies on 64-bit integer type, it's totally
40 * inappropriate for platforms which don't support it, most notably
41 * 16-bit platforms.
42 *                                      <appro@fy.chalmers.se>
43 */
44# include <stdlib.h>
45# include <string.h>
46
47# include <openssl/crypto.h>
48# include <openssl/sha.h>
49# include <openssl/opensslv.h>
50
51# include "cryptlib.h"
52
53const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT;
54
55# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57    defined(__s390__) || defined(__s390x__) || \
58    defined(__aarch64__) || \
59    defined(SHA512_ASM)
60#  define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61# endif
62
63fips_md_init_ctx(SHA384, SHA512)
64{
65    c->h[0] = U64(0xcbbb9d5dc1059ed8);
66    c->h[1] = U64(0x629a292a367cd507);
67    c->h[2] = U64(0x9159015a3070dd17);
68    c->h[3] = U64(0x152fecd8f70e5939);
69    c->h[4] = U64(0x67332667ffc00b31);
70    c->h[5] = U64(0x8eb44a8768581511);
71    c->h[6] = U64(0xdb0c2e0d64f98fa7);
72    c->h[7] = U64(0x47b5481dbefa4fa4);
73
74    c->Nl = 0;
75    c->Nh = 0;
76    c->num = 0;
77    c->md_len = SHA384_DIGEST_LENGTH;
78    return 1;
79}
80
81fips_md_init(SHA512)
82{
83    c->h[0] = U64(0x6a09e667f3bcc908);
84    c->h[1] = U64(0xbb67ae8584caa73b);
85    c->h[2] = U64(0x3c6ef372fe94f82b);
86    c->h[3] = U64(0xa54ff53a5f1d36f1);
87    c->h[4] = U64(0x510e527fade682d1);
88    c->h[5] = U64(0x9b05688c2b3e6c1f);
89    c->h[6] = U64(0x1f83d9abfb41bd6b);
90    c->h[7] = U64(0x5be0cd19137e2179);
91
92    c->Nl = 0;
93    c->Nh = 0;
94    c->num = 0;
95    c->md_len = SHA512_DIGEST_LENGTH;
96    return 1;
97}
98
99# ifndef SHA512_ASM
100static
101# endif
102void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
103
104int SHA512_Final(unsigned char *md, SHA512_CTX *c)
105{
106    unsigned char *p = (unsigned char *)c->u.p;
107    size_t n = c->num;
108
109    p[n] = 0x80;                /* There always is a room for one */
110    n++;
111    if (n > (sizeof(c->u) - 16))
112        memset(p + n, 0, sizeof(c->u) - n), n = 0,
113            sha512_block_data_order(c, p, 1);
114
115    memset(p + n, 0, sizeof(c->u) - 16 - n);
116# ifdef  B_ENDIAN
117    c->u.d[SHA_LBLOCK - 2] = c->Nh;
118    c->u.d[SHA_LBLOCK - 1] = c->Nl;
119# else
120    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
121    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
122    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
123    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
124    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
125    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
126    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
127    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
128    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
129    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
130    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
131    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
132    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
133    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
134    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
135    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
136# endif
137
138    sha512_block_data_order(c, p, 1);
139
140    if (md == 0)
141        return 0;
142
143    switch (c->md_len) {
144        /* Let compiler decide if it's appropriate to unroll... */
145    case SHA384_DIGEST_LENGTH:
146        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
147            SHA_LONG64 t = c->h[n];
148
149            *(md++) = (unsigned char)(t >> 56);
150            *(md++) = (unsigned char)(t >> 48);
151            *(md++) = (unsigned char)(t >> 40);
152            *(md++) = (unsigned char)(t >> 32);
153            *(md++) = (unsigned char)(t >> 24);
154            *(md++) = (unsigned char)(t >> 16);
155            *(md++) = (unsigned char)(t >> 8);
156            *(md++) = (unsigned char)(t);
157        }
158        break;
159    case SHA512_DIGEST_LENGTH:
160        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
161            SHA_LONG64 t = c->h[n];
162
163            *(md++) = (unsigned char)(t >> 56);
164            *(md++) = (unsigned char)(t >> 48);
165            *(md++) = (unsigned char)(t >> 40);
166            *(md++) = (unsigned char)(t >> 32);
167            *(md++) = (unsigned char)(t >> 24);
168            *(md++) = (unsigned char)(t >> 16);
169            *(md++) = (unsigned char)(t >> 8);
170            *(md++) = (unsigned char)(t);
171        }
172        break;
173        /* ... as well as make sure md_len is not abused. */
174    default:
175        return 0;
176    }
177
178    return 1;
179}
180
181int SHA384_Final(unsigned char *md, SHA512_CTX *c)
182{
183    return SHA512_Final(md, c);
184}
185
186int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
187{
188    SHA_LONG64 l;
189    unsigned char *p = c->u.p;
190    const unsigned char *data = (const unsigned char *)_data;
191
192    if (len == 0)
193        return 1;
194
195    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
196    if (l < c->Nl)
197        c->Nh++;
198    if (sizeof(len) >= 8)
199        c->Nh += (((SHA_LONG64) len) >> 61);
200    c->Nl = l;
201
202    if (c->num != 0) {
203        size_t n = sizeof(c->u) - c->num;
204
205        if (len < n) {
206            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
207            return 1;
208        } else {
209            memcpy(p + c->num, data, n), c->num = 0;
210            len -= n, data += n;
211            sha512_block_data_order(c, p, 1);
212        }
213    }
214
215    if (len >= sizeof(c->u)) {
216# ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
217        if ((size_t)data % sizeof(c->u.d[0]) != 0)
218            while (len >= sizeof(c->u))
219                memcpy(p, data, sizeof(c->u)),
220                    sha512_block_data_order(c, p, 1),
221                    len -= sizeof(c->u), data += sizeof(c->u);
222        else
223# endif
224            sha512_block_data_order(c, data, len / sizeof(c->u)),
225                data += len, len %= sizeof(c->u), data -= len;
226    }
227
228    if (len != 0)
229        memcpy(p, data, len), c->num = (int)len;
230
231    return 1;
232}
233
234int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
235{
236    return SHA512_Update(c, data, len);
237}
238
239void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
240{
241# ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
242    if ((size_t)data % sizeof(c->u.d[0]) != 0)
243        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
244# endif
245    sha512_block_data_order(c, data, 1);
246}
247
248unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
249{
250    SHA512_CTX c;
251    static unsigned char m[SHA384_DIGEST_LENGTH];
252
253    if (md == NULL)
254        md = m;
255    SHA384_Init(&c);
256    SHA512_Update(&c, d, n);
257    SHA512_Final(md, &c);
258    OPENSSL_cleanse(&c, sizeof(c));
259    return (md);
260}
261
262unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
263{
264    SHA512_CTX c;
265    static unsigned char m[SHA512_DIGEST_LENGTH];
266
267    if (md == NULL)
268        md = m;
269    SHA512_Init(&c);
270    SHA512_Update(&c, d, n);
271    SHA512_Final(md, &c);
272    OPENSSL_cleanse(&c, sizeof(c));
273    return (md);
274}
275
276# ifndef SHA512_ASM
277static const SHA_LONG64 K512[80] = {
278    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
279    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
280    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
281    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
282    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
283    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
284    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
285    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
286    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
287    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
288    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
289    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
290    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
291    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
292    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
293    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
294    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
295    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
296    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
297    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
298    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
299    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
300    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
301    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
302    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
303    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
304    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
305    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
306    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
307    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
308    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
309    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
310    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
311    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
312    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
313    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
314    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
315    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
316    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
317    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
318};
319
320#  ifndef PEDANTIC
321#   if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
322#    if defined(__x86_64) || defined(__x86_64__)
323#     define ROTR(a,n)    ({ SHA_LONG64 ret;              \
324                                asm ("rorq %1,%0"       \
325                                : "=r"(ret)             \
326                                : "J"(n),"0"(a)         \
327                                : "cc"); ret;           })
328#     if !defined(B_ENDIAN)
329#      define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
330                                asm ("bswapq    %0"             \
331                                : "=r"(ret)                     \
332                                : "0"(ret)); ret;               })
333#     endif
334#    elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
335#     if defined(I386_ONLY)
336#      define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
337                         unsigned int hi=p[0],lo=p[1];          \
338                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
339                                    "roll $16,%%eax; roll $16,%%edx; "\
340                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
341                                : "=a"(lo),"=d"(hi)             \
342                                : "0"(lo),"1"(hi) : "cc");      \
343                                ((SHA_LONG64)hi)<<32|lo;        })
344#     else
345#      define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
346                         unsigned int hi=p[0],lo=p[1];          \
347                                asm ("bswapl %0; bswapl %1;"    \
348                                : "=r"(lo),"=r"(hi)             \
349                                : "0"(lo),"1"(hi));             \
350                                ((SHA_LONG64)hi)<<32|lo;        })
351#     endif
352#    elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
353#     define ROTR(a,n)    ({ SHA_LONG64 ret;              \
354                                asm ("rotrdi %0,%1,%2"  \
355                                : "=r"(ret)             \
356                                : "r"(a),"K"(n)); ret;  })
357#    elif defined(__aarch64__)
358#     define ROTR(a,n)    ({ SHA_LONG64 ret;              \
359                                asm ("ror %0,%1,%2"     \
360                                : "=r"(ret)             \
361                                : "r"(a),"I"(n)); ret;  })
362#     if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
363        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
364#      define PULL64(x)   ({ SHA_LONG64 ret;                      \
365                                asm ("rev       %0,%1"          \
366                                : "=r"(ret)                     \
367                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret;             })
368#     endif
369#    endif
370#   elif defined(_MSC_VER)
371#    if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
372#     pragma intrinsic(_rotr64)
373#     define ROTR(a,n)    _rotr64((a),n)
374#    endif
375#    if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
376#     if defined(I386_ONLY)
377static SHA_LONG64 __fastcall __pull64be(const void *x)
378{
379    _asm mov edx,[ecx + 0]
380    _asm mov eax,[ecx + 4]
381_asm xchg dh, dl
382        _asm xchg ah, al
383        _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
384#     else
385static SHA_LONG64 __fastcall __pull64be(const void *x)
386{
387    _asm mov edx,[ecx + 0]
388    _asm mov eax,[ecx + 4]
389_asm bswap edx _asm bswap eax}
390#     endif
391#     define PULL64(x) __pull64be(&(x))
392#     if _MSC_VER<=1200
393#      pragma inline_depth(0)
394#     endif
395#    endif
396#   endif
397#  endif
398#  ifndef PULL64
399#   define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
400#   define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
401#  endif
402#  ifndef ROTR
403#   define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
404#  endif
405#  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
406#  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
407#  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
408#  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
409#  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
410#  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
411#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
412/*
413 * This code should give better results on 32-bit CPU with less than
414 * ~24 registers, both size and performance wise...
415 */ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
416                                        size_t num)
417{
418    const SHA_LONG64 *W = in;
419    SHA_LONG64 A, E, T;
420    SHA_LONG64 X[9 + 80], *F;
421    int i;
422
423    while (num--) {
424
425        F = X + 80;
426        A = ctx->h[0];
427        F[1] = ctx->h[1];
428        F[2] = ctx->h[2];
429        F[3] = ctx->h[3];
430        E = ctx->h[4];
431        F[5] = ctx->h[5];
432        F[6] = ctx->h[6];
433        F[7] = ctx->h[7];
434
435        for (i = 0; i < 16; i++, F--) {
436#   ifdef B_ENDIAN
437            T = W[i];
438#   else
439            T = PULL64(W[i]);
440#   endif
441            F[0] = A;
442            F[4] = E;
443            F[8] = T;
444            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
445            E = F[3] + T;
446            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
447        }
448
449        for (; i < 80; i++, F--) {
450            T = sigma0(F[8 + 16 - 1]);
451            T += sigma1(F[8 + 16 - 14]);
452            T += F[8 + 16] + F[8 + 16 - 9];
453
454            F[0] = A;
455            F[4] = E;
456            F[8] = T;
457            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
458            E = F[3] + T;
459            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
460        }
461
462        ctx->h[0] += A;
463        ctx->h[1] += F[1];
464        ctx->h[2] += F[2];
465        ctx->h[3] += F[3];
466        ctx->h[4] += E;
467        ctx->h[5] += F[5];
468        ctx->h[6] += F[6];
469        ctx->h[7] += F[7];
470
471        W += SHA_LBLOCK;
472    }
473}
474
475#  elif defined(OPENSSL_SMALL_FOOTPRINT)
476static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
477                                    size_t num)
478{
479    const SHA_LONG64 *W = in;
480    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
481    SHA_LONG64 X[16];
482    int i;
483
484    while (num--) {
485
486        a = ctx->h[0];
487        b = ctx->h[1];
488        c = ctx->h[2];
489        d = ctx->h[3];
490        e = ctx->h[4];
491        f = ctx->h[5];
492        g = ctx->h[6];
493        h = ctx->h[7];
494
495        for (i = 0; i < 16; i++) {
496#   ifdef B_ENDIAN
497            T1 = X[i] = W[i];
498#   else
499            T1 = X[i] = PULL64(W[i]);
500#   endif
501            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
502            T2 = Sigma0(a) + Maj(a, b, c);
503            h = g;
504            g = f;
505            f = e;
506            e = d + T1;
507            d = c;
508            c = b;
509            b = a;
510            a = T1 + T2;
511        }
512
513        for (; i < 80; i++) {
514            s0 = X[(i + 1) & 0x0f];
515            s0 = sigma0(s0);
516            s1 = X[(i + 14) & 0x0f];
517            s1 = sigma1(s1);
518
519            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
520            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
521            T2 = Sigma0(a) + Maj(a, b, c);
522            h = g;
523            g = f;
524            f = e;
525            e = d + T1;
526            d = c;
527            c = b;
528            b = a;
529            a = T1 + T2;
530        }
531
532        ctx->h[0] += a;
533        ctx->h[1] += b;
534        ctx->h[2] += c;
535        ctx->h[3] += d;
536        ctx->h[4] += e;
537        ctx->h[5] += f;
538        ctx->h[6] += g;
539        ctx->h[7] += h;
540
541        W += SHA_LBLOCK;
542    }
543}
544
545#  else
546#   define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
547        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
548        h = Sigma0(a) + Maj(a,b,c);                     \
549        d += T1;        h += T1;                } while (0)
550#   define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
551        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
552        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
553        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
554        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
555static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
556                                    size_t num)
557{
558    const SHA_LONG64 *W = in;
559    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
560    SHA_LONG64 X[16];
561    int i;
562
563    while (num--) {
564
565        a = ctx->h[0];
566        b = ctx->h[1];
567        c = ctx->h[2];
568        d = ctx->h[3];
569        e = ctx->h[4];
570        f = ctx->h[5];
571        g = ctx->h[6];
572        h = ctx->h[7];
573
574#   ifdef B_ENDIAN
575        T1 = X[0] = W[0];
576        ROUND_00_15(0, a, b, c, d, e, f, g, h);
577        T1 = X[1] = W[1];
578        ROUND_00_15(1, h, a, b, c, d, e, f, g);
579        T1 = X[2] = W[2];
580        ROUND_00_15(2, g, h, a, b, c, d, e, f);
581        T1 = X[3] = W[3];
582        ROUND_00_15(3, f, g, h, a, b, c, d, e);
583        T1 = X[4] = W[4];
584        ROUND_00_15(4, e, f, g, h, a, b, c, d);
585        T1 = X[5] = W[5];
586        ROUND_00_15(5, d, e, f, g, h, a, b, c);
587        T1 = X[6] = W[6];
588        ROUND_00_15(6, c, d, e, f, g, h, a, b);
589        T1 = X[7] = W[7];
590        ROUND_00_15(7, b, c, d, e, f, g, h, a);
591        T1 = X[8] = W[8];
592        ROUND_00_15(8, a, b, c, d, e, f, g, h);
593        T1 = X[9] = W[9];
594        ROUND_00_15(9, h, a, b, c, d, e, f, g);
595        T1 = X[10] = W[10];
596        ROUND_00_15(10, g, h, a, b, c, d, e, f);
597        T1 = X[11] = W[11];
598        ROUND_00_15(11, f, g, h, a, b, c, d, e);
599        T1 = X[12] = W[12];
600        ROUND_00_15(12, e, f, g, h, a, b, c, d);
601        T1 = X[13] = W[13];
602        ROUND_00_15(13, d, e, f, g, h, a, b, c);
603        T1 = X[14] = W[14];
604        ROUND_00_15(14, c, d, e, f, g, h, a, b);
605        T1 = X[15] = W[15];
606        ROUND_00_15(15, b, c, d, e, f, g, h, a);
607#   else
608        T1 = X[0] = PULL64(W[0]);
609        ROUND_00_15(0, a, b, c, d, e, f, g, h);
610        T1 = X[1] = PULL64(W[1]);
611        ROUND_00_15(1, h, a, b, c, d, e, f, g);
612        T1 = X[2] = PULL64(W[2]);
613        ROUND_00_15(2, g, h, a, b, c, d, e, f);
614        T1 = X[3] = PULL64(W[3]);
615        ROUND_00_15(3, f, g, h, a, b, c, d, e);
616        T1 = X[4] = PULL64(W[4]);
617        ROUND_00_15(4, e, f, g, h, a, b, c, d);
618        T1 = X[5] = PULL64(W[5]);
619        ROUND_00_15(5, d, e, f, g, h, a, b, c);
620        T1 = X[6] = PULL64(W[6]);
621        ROUND_00_15(6, c, d, e, f, g, h, a, b);
622        T1 = X[7] = PULL64(W[7]);
623        ROUND_00_15(7, b, c, d, e, f, g, h, a);
624        T1 = X[8] = PULL64(W[8]);
625        ROUND_00_15(8, a, b, c, d, e, f, g, h);
626        T1 = X[9] = PULL64(W[9]);
627        ROUND_00_15(9, h, a, b, c, d, e, f, g);
628        T1 = X[10] = PULL64(W[10]);
629        ROUND_00_15(10, g, h, a, b, c, d, e, f);
630        T1 = X[11] = PULL64(W[11]);
631        ROUND_00_15(11, f, g, h, a, b, c, d, e);
632        T1 = X[12] = PULL64(W[12]);
633        ROUND_00_15(12, e, f, g, h, a, b, c, d);
634        T1 = X[13] = PULL64(W[13]);
635        ROUND_00_15(13, d, e, f, g, h, a, b, c);
636        T1 = X[14] = PULL64(W[14]);
637        ROUND_00_15(14, c, d, e, f, g, h, a, b);
638        T1 = X[15] = PULL64(W[15]);
639        ROUND_00_15(15, b, c, d, e, f, g, h, a);
640#   endif
641
642        for (i = 16; i < 80; i += 16) {
643            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
644            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
645            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
646            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
647            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
648            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
649            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
650            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
651            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
652            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
653            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
654            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
655            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
656            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
657            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
658            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
659        }
660
661        ctx->h[0] += a;
662        ctx->h[1] += b;
663        ctx->h[2] += c;
664        ctx->h[3] += d;
665        ctx->h[4] += e;
666        ctx->h[5] += f;
667        ctx->h[6] += g;
668        ctx->h[7] += h;
669
670        W += SHA_LBLOCK;
671    }
672}
673
674#  endif
675
676# endif                         /* SHA512_ASM */
677
678#else                           /* !OPENSSL_NO_SHA512 */
679
680# if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
681static void *dummy = &dummy;
682# endif
683
684#endif                          /* !OPENSSL_NO_SHA512 */
685