sha512.c revision 296341
1/* crypto/sha/sha512.c */
2/* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
6 */
7#include <openssl/opensslconf.h>
8#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9/*-
10 * IMPLEMENTATION NOTES.
11 *
12 * As you might have noticed 32-bit hash algorithms:
13 *
14 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15 * - optimized versions implement two transform functions: one operating
16 *   on [aligned] data in host byte order and one - on data in input
17 *   stream byte order;
18 * - share common byte-order neutral collector and padding function
19 *   implementations, ../md32_common.h;
20 *
21 * Neither of the above applies to this SHA-512 implementations. Reasons
22 * [in reverse order] are:
23 *
24 * - it's the only 64-bit hash algorithm for the moment of this writing,
25 *   there is no need for common collector/padding implementation [yet];
26 * - by supporting only one transform function [which operates on
27 *   *aligned* data in input stream byte order, big-endian in this case]
28 *   we minimize burden of maintenance in two ways: a) collector/padding
29 *   function is simpler; b) only one transform function to stare at;
30 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31 *   apply a number of optimizations to mitigate potential performance
32 *   penalties caused by previous design decision;
33 *
34 * Caveat lector.
35 *
36 * Implementation relies on the fact that "long long" is 64-bit on
37 * both 32- and 64-bit platforms. If some compiler vendor comes up
38 * with 128-bit long long, adjustment to sha.h would be required.
39 * As this implementation relies on 64-bit integer type, it's totally
40 * inappropriate for platforms which don't support it, most notably
41 * 16-bit platforms.
42 *                                      <appro@fy.chalmers.se>
43 */
44# include <stdlib.h>
45# include <string.h>
46
47# include <openssl/crypto.h>
48# include <openssl/sha.h>
49# include <openssl/opensslv.h>
50
51# include "cryptlib.h"
52
53const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT;
54
55# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57    defined(__s390__) || defined(__s390x__) || \
58    defined(SHA512_ASM)
59#  define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
60# endif
61
62fips_md_init_ctx(SHA384, SHA512)
63{
64    c->h[0] = U64(0xcbbb9d5dc1059ed8);
65    c->h[1] = U64(0x629a292a367cd507);
66    c->h[2] = U64(0x9159015a3070dd17);
67    c->h[3] = U64(0x152fecd8f70e5939);
68    c->h[4] = U64(0x67332667ffc00b31);
69    c->h[5] = U64(0x8eb44a8768581511);
70    c->h[6] = U64(0xdb0c2e0d64f98fa7);
71    c->h[7] = U64(0x47b5481dbefa4fa4);
72
73    c->Nl = 0;
74    c->Nh = 0;
75    c->num = 0;
76    c->md_len = SHA384_DIGEST_LENGTH;
77    return 1;
78}
79
80fips_md_init(SHA512)
81{
82    c->h[0] = U64(0x6a09e667f3bcc908);
83    c->h[1] = U64(0xbb67ae8584caa73b);
84    c->h[2] = U64(0x3c6ef372fe94f82b);
85    c->h[3] = U64(0xa54ff53a5f1d36f1);
86    c->h[4] = U64(0x510e527fade682d1);
87    c->h[5] = U64(0x9b05688c2b3e6c1f);
88    c->h[6] = U64(0x1f83d9abfb41bd6b);
89    c->h[7] = U64(0x5be0cd19137e2179);
90
91    c->Nl = 0;
92    c->Nh = 0;
93    c->num = 0;
94    c->md_len = SHA512_DIGEST_LENGTH;
95    return 1;
96}
97
98# ifndef SHA512_ASM
99static
100# endif
101void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
102
103int SHA512_Final(unsigned char *md, SHA512_CTX *c)
104{
105    unsigned char *p = (unsigned char *)c->u.p;
106    size_t n = c->num;
107
108    p[n] = 0x80;                /* There always is a room for one */
109    n++;
110    if (n > (sizeof(c->u) - 16))
111        memset(p + n, 0, sizeof(c->u) - n), n = 0,
112            sha512_block_data_order(c, p, 1);
113
114    memset(p + n, 0, sizeof(c->u) - 16 - n);
115# ifdef  B_ENDIAN
116    c->u.d[SHA_LBLOCK - 2] = c->Nh;
117    c->u.d[SHA_LBLOCK - 1] = c->Nl;
118# else
119    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
120    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
121    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
122    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
123    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
124    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
125    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
126    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
127    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
128    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
129    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
130    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
131    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
132    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
133    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
134    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
135# endif
136
137    sha512_block_data_order(c, p, 1);
138
139    if (md == 0)
140        return 0;
141
142    switch (c->md_len) {
143        /* Let compiler decide if it's appropriate to unroll... */
144    case SHA384_DIGEST_LENGTH:
145        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
146            SHA_LONG64 t = c->h[n];
147
148            *(md++) = (unsigned char)(t >> 56);
149            *(md++) = (unsigned char)(t >> 48);
150            *(md++) = (unsigned char)(t >> 40);
151            *(md++) = (unsigned char)(t >> 32);
152            *(md++) = (unsigned char)(t >> 24);
153            *(md++) = (unsigned char)(t >> 16);
154            *(md++) = (unsigned char)(t >> 8);
155            *(md++) = (unsigned char)(t);
156        }
157        break;
158    case SHA512_DIGEST_LENGTH:
159        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
160            SHA_LONG64 t = c->h[n];
161
162            *(md++) = (unsigned char)(t >> 56);
163            *(md++) = (unsigned char)(t >> 48);
164            *(md++) = (unsigned char)(t >> 40);
165            *(md++) = (unsigned char)(t >> 32);
166            *(md++) = (unsigned char)(t >> 24);
167            *(md++) = (unsigned char)(t >> 16);
168            *(md++) = (unsigned char)(t >> 8);
169            *(md++) = (unsigned char)(t);
170        }
171        break;
172        /* ... as well as make sure md_len is not abused. */
173    default:
174        return 0;
175    }
176
177    return 1;
178}
179
180int SHA384_Final(unsigned char *md, SHA512_CTX *c)
181{
182    return SHA512_Final(md, c);
183}
184
185int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
186{
187    SHA_LONG64 l;
188    unsigned char *p = c->u.p;
189    const unsigned char *data = (const unsigned char *)_data;
190
191    if (len == 0)
192        return 1;
193
194    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
195    if (l < c->Nl)
196        c->Nh++;
197    if (sizeof(len) >= 8)
198        c->Nh += (((SHA_LONG64) len) >> 61);
199    c->Nl = l;
200
201    if (c->num != 0) {
202        size_t n = sizeof(c->u) - c->num;
203
204        if (len < n) {
205            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
206            return 1;
207        } else {
208            memcpy(p + c->num, data, n), c->num = 0;
209            len -= n, data += n;
210            sha512_block_data_order(c, p, 1);
211        }
212    }
213
214    if (len >= sizeof(c->u)) {
215# ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
216        if ((size_t)data % sizeof(c->u.d[0]) != 0)
217            while (len >= sizeof(c->u))
218                memcpy(p, data, sizeof(c->u)),
219                    sha512_block_data_order(c, p, 1),
220                    len -= sizeof(c->u), data += sizeof(c->u);
221        else
222# endif
223            sha512_block_data_order(c, data, len / sizeof(c->u)),
224                data += len, len %= sizeof(c->u), data -= len;
225    }
226
227    if (len != 0)
228        memcpy(p, data, len), c->num = (int)len;
229
230    return 1;
231}
232
233int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
234{
235    return SHA512_Update(c, data, len);
236}
237
238void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
239{
240# ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
241    if ((size_t)data % sizeof(c->u.d[0]) != 0)
242        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
243# endif
244    sha512_block_data_order(c, data, 1);
245}
246
247unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
248{
249    SHA512_CTX c;
250    static unsigned char m[SHA384_DIGEST_LENGTH];
251
252    if (md == NULL)
253        md = m;
254    SHA384_Init(&c);
255    SHA512_Update(&c, d, n);
256    SHA512_Final(md, &c);
257    OPENSSL_cleanse(&c, sizeof(c));
258    return (md);
259}
260
261unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
262{
263    SHA512_CTX c;
264    static unsigned char m[SHA512_DIGEST_LENGTH];
265
266    if (md == NULL)
267        md = m;
268    SHA512_Init(&c);
269    SHA512_Update(&c, d, n);
270    SHA512_Final(md, &c);
271    OPENSSL_cleanse(&c, sizeof(c));
272    return (md);
273}
274
275# ifndef SHA512_ASM
276static const SHA_LONG64 K512[80] = {
277    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
278    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
279    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
280    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
281    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
282    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
283    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
284    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
285    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
286    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
287    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
288    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
289    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
290    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
291    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
292    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
293    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
294    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
295    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
296    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
297    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
298    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
299    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
300    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
301    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
302    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
303    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
304    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
305    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
306    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
307    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
308    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
309    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
310    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
311    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
312    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
313    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
314    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
315    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
316    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
317};
318
319#  ifndef PEDANTIC
320#   if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
321#    if defined(__x86_64) || defined(__x86_64__)
322#     define ROTR(a,n)    ({ SHA_LONG64 ret;              \
323                                asm ("rorq %1,%0"       \
324                                : "=r"(ret)             \
325                                : "J"(n),"0"(a)         \
326                                : "cc"); ret;           })
327#     if !defined(B_ENDIAN)
328#      define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
329                                asm ("bswapq    %0"             \
330                                : "=r"(ret)                     \
331                                : "0"(ret)); ret;               })
332#     endif
333#    elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
334#     if defined(I386_ONLY)
335#      define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
336                         unsigned int hi=p[0],lo=p[1];          \
337                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
338                                    "roll $16,%%eax; roll $16,%%edx; "\
339                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
340                                : "=a"(lo),"=d"(hi)             \
341                                : "0"(lo),"1"(hi) : "cc");      \
342                                ((SHA_LONG64)hi)<<32|lo;        })
343#     else
344#      define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
345                         unsigned int hi=p[0],lo=p[1];          \
346                                asm ("bswapl %0; bswapl %1;"    \
347                                : "=r"(lo),"=r"(hi)             \
348                                : "0"(lo),"1"(hi));             \
349                                ((SHA_LONG64)hi)<<32|lo;        })
350#     endif
351#    elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
352#     define ROTR(a,n)    ({ SHA_LONG64 ret;              \
353                                asm ("rotrdi %0,%1,%2"  \
354                                : "=r"(ret)             \
355                                : "r"(a),"K"(n)); ret;  })
356#    endif
357#   elif defined(_MSC_VER)
358#    if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
359#     pragma intrinsic(_rotr64)
360#     define ROTR(a,n)    _rotr64((a),n)
361#    endif
362#    if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
363#     if defined(I386_ONLY)
364static SHA_LONG64 __fastcall __pull64be(const void *x)
365{
366    _asm mov edx,[ecx + 0]
367    _asm mov eax,[ecx + 4]
368_asm xchg dh, dl
369        _asm xchg ah, al
370        _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
371#     else
372static SHA_LONG64 __fastcall __pull64be(const void *x)
373{
374    _asm mov edx,[ecx + 0]
375    _asm mov eax,[ecx + 4]
376_asm bswap edx _asm bswap eax}
377#     endif
378#     define PULL64(x) __pull64be(&(x))
379#     if _MSC_VER<=1200
380#      pragma inline_depth(0)
381#     endif
382#    endif
383#   endif
384#  endif
385#  ifndef PULL64
386#   define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
387#   define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
388#  endif
389#  ifndef ROTR
390#   define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
391#  endif
392#  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
393#  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
394#  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
395#  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
396#  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
397#  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
398#  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
399/*
400 * This code should give better results on 32-bit CPU with less than
401 * ~24 registers, both size and performance wise...
402 */ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
403                                        size_t num)
404{
405    const SHA_LONG64 *W = in;
406    SHA_LONG64 A, E, T;
407    SHA_LONG64 X[9 + 80], *F;
408    int i;
409
410    while (num--) {
411
412        F = X + 80;
413        A = ctx->h[0];
414        F[1] = ctx->h[1];
415        F[2] = ctx->h[2];
416        F[3] = ctx->h[3];
417        E = ctx->h[4];
418        F[5] = ctx->h[5];
419        F[6] = ctx->h[6];
420        F[7] = ctx->h[7];
421
422        for (i = 0; i < 16; i++, F--) {
423#   ifdef B_ENDIAN
424            T = W[i];
425#   else
426            T = PULL64(W[i]);
427#   endif
428            F[0] = A;
429            F[4] = E;
430            F[8] = T;
431            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
432            E = F[3] + T;
433            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
434        }
435
436        for (; i < 80; i++, F--) {
437            T = sigma0(F[8 + 16 - 1]);
438            T += sigma1(F[8 + 16 - 14]);
439            T += F[8 + 16] + F[8 + 16 - 9];
440
441            F[0] = A;
442            F[4] = E;
443            F[8] = T;
444            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
445            E = F[3] + T;
446            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
447        }
448
449        ctx->h[0] += A;
450        ctx->h[1] += F[1];
451        ctx->h[2] += F[2];
452        ctx->h[3] += F[3];
453        ctx->h[4] += E;
454        ctx->h[5] += F[5];
455        ctx->h[6] += F[6];
456        ctx->h[7] += F[7];
457
458        W += SHA_LBLOCK;
459    }
460}
461
462#  elif defined(OPENSSL_SMALL_FOOTPRINT)
463static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
464                                    size_t num)
465{
466    const SHA_LONG64 *W = in;
467    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
468    SHA_LONG64 X[16];
469    int i;
470
471    while (num--) {
472
473        a = ctx->h[0];
474        b = ctx->h[1];
475        c = ctx->h[2];
476        d = ctx->h[3];
477        e = ctx->h[4];
478        f = ctx->h[5];
479        g = ctx->h[6];
480        h = ctx->h[7];
481
482        for (i = 0; i < 16; i++) {
483#   ifdef B_ENDIAN
484            T1 = X[i] = W[i];
485#   else
486            T1 = X[i] = PULL64(W[i]);
487#   endif
488            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
489            T2 = Sigma0(a) + Maj(a, b, c);
490            h = g;
491            g = f;
492            f = e;
493            e = d + T1;
494            d = c;
495            c = b;
496            b = a;
497            a = T1 + T2;
498        }
499
500        for (; i < 80; i++) {
501            s0 = X[(i + 1) & 0x0f];
502            s0 = sigma0(s0);
503            s1 = X[(i + 14) & 0x0f];
504            s1 = sigma1(s1);
505
506            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
507            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
508            T2 = Sigma0(a) + Maj(a, b, c);
509            h = g;
510            g = f;
511            f = e;
512            e = d + T1;
513            d = c;
514            c = b;
515            b = a;
516            a = T1 + T2;
517        }
518
519        ctx->h[0] += a;
520        ctx->h[1] += b;
521        ctx->h[2] += c;
522        ctx->h[3] += d;
523        ctx->h[4] += e;
524        ctx->h[5] += f;
525        ctx->h[6] += g;
526        ctx->h[7] += h;
527
528        W += SHA_LBLOCK;
529    }
530}
531
532#  else
533#   define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
534        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
535        h = Sigma0(a) + Maj(a,b,c);                     \
536        d += T1;        h += T1;                } while (0)
537#   define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
538        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
539        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
540        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
541        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
542static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
543                                    size_t num)
544{
545    const SHA_LONG64 *W = in;
546    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
547    SHA_LONG64 X[16];
548    int i;
549
550    while (num--) {
551
552        a = ctx->h[0];
553        b = ctx->h[1];
554        c = ctx->h[2];
555        d = ctx->h[3];
556        e = ctx->h[4];
557        f = ctx->h[5];
558        g = ctx->h[6];
559        h = ctx->h[7];
560
561#   ifdef B_ENDIAN
562        T1 = X[0] = W[0];
563        ROUND_00_15(0, a, b, c, d, e, f, g, h);
564        T1 = X[1] = W[1];
565        ROUND_00_15(1, h, a, b, c, d, e, f, g);
566        T1 = X[2] = W[2];
567        ROUND_00_15(2, g, h, a, b, c, d, e, f);
568        T1 = X[3] = W[3];
569        ROUND_00_15(3, f, g, h, a, b, c, d, e);
570        T1 = X[4] = W[4];
571        ROUND_00_15(4, e, f, g, h, a, b, c, d);
572        T1 = X[5] = W[5];
573        ROUND_00_15(5, d, e, f, g, h, a, b, c);
574        T1 = X[6] = W[6];
575        ROUND_00_15(6, c, d, e, f, g, h, a, b);
576        T1 = X[7] = W[7];
577        ROUND_00_15(7, b, c, d, e, f, g, h, a);
578        T1 = X[8] = W[8];
579        ROUND_00_15(8, a, b, c, d, e, f, g, h);
580        T1 = X[9] = W[9];
581        ROUND_00_15(9, h, a, b, c, d, e, f, g);
582        T1 = X[10] = W[10];
583        ROUND_00_15(10, g, h, a, b, c, d, e, f);
584        T1 = X[11] = W[11];
585        ROUND_00_15(11, f, g, h, a, b, c, d, e);
586        T1 = X[12] = W[12];
587        ROUND_00_15(12, e, f, g, h, a, b, c, d);
588        T1 = X[13] = W[13];
589        ROUND_00_15(13, d, e, f, g, h, a, b, c);
590        T1 = X[14] = W[14];
591        ROUND_00_15(14, c, d, e, f, g, h, a, b);
592        T1 = X[15] = W[15];
593        ROUND_00_15(15, b, c, d, e, f, g, h, a);
594#   else
595        T1 = X[0] = PULL64(W[0]);
596        ROUND_00_15(0, a, b, c, d, e, f, g, h);
597        T1 = X[1] = PULL64(W[1]);
598        ROUND_00_15(1, h, a, b, c, d, e, f, g);
599        T1 = X[2] = PULL64(W[2]);
600        ROUND_00_15(2, g, h, a, b, c, d, e, f);
601        T1 = X[3] = PULL64(W[3]);
602        ROUND_00_15(3, f, g, h, a, b, c, d, e);
603        T1 = X[4] = PULL64(W[4]);
604        ROUND_00_15(4, e, f, g, h, a, b, c, d);
605        T1 = X[5] = PULL64(W[5]);
606        ROUND_00_15(5, d, e, f, g, h, a, b, c);
607        T1 = X[6] = PULL64(W[6]);
608        ROUND_00_15(6, c, d, e, f, g, h, a, b);
609        T1 = X[7] = PULL64(W[7]);
610        ROUND_00_15(7, b, c, d, e, f, g, h, a);
611        T1 = X[8] = PULL64(W[8]);
612        ROUND_00_15(8, a, b, c, d, e, f, g, h);
613        T1 = X[9] = PULL64(W[9]);
614        ROUND_00_15(9, h, a, b, c, d, e, f, g);
615        T1 = X[10] = PULL64(W[10]);
616        ROUND_00_15(10, g, h, a, b, c, d, e, f);
617        T1 = X[11] = PULL64(W[11]);
618        ROUND_00_15(11, f, g, h, a, b, c, d, e);
619        T1 = X[12] = PULL64(W[12]);
620        ROUND_00_15(12, e, f, g, h, a, b, c, d);
621        T1 = X[13] = PULL64(W[13]);
622        ROUND_00_15(13, d, e, f, g, h, a, b, c);
623        T1 = X[14] = PULL64(W[14]);
624        ROUND_00_15(14, c, d, e, f, g, h, a, b);
625        T1 = X[15] = PULL64(W[15]);
626        ROUND_00_15(15, b, c, d, e, f, g, h, a);
627#   endif
628
629        for (i = 16; i < 80; i += 16) {
630            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
631            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
632            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
633            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
634            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
635            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
636            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
637            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
638            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
639            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
640            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
641            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
642            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
643            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
644            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
645            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
646        }
647
648        ctx->h[0] += a;
649        ctx->h[1] += b;
650        ctx->h[2] += c;
651        ctx->h[3] += d;
652        ctx->h[4] += e;
653        ctx->h[5] += f;
654        ctx->h[6] += g;
655        ctx->h[7] += h;
656
657        W += SHA_LBLOCK;
658    }
659}
660
661#  endif
662
663# endif                         /* SHA512_ASM */
664
665#else                           /* !OPENSSL_NO_SHA512 */
666
667# if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
668static void *dummy = &dummy;
669# endif
670
671#endif                          /* !OPENSSL_NO_SHA512 */
672