sha512.c revision 296465
1187498Simp/* crypto/sha/sha512.c */
2187498Simp/* ====================================================================
3187498Simp * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4187498Simp * according to the OpenSSL license [found in ../../LICENSE].
5187498Simp * ====================================================================
6187498Simp */
7187498Simp#include <openssl/opensslconf.h>
8187498Simp#ifdef OPENSSL_FIPS
9187498Simp# include <openssl/fips.h>
10187498Simp#endif
11187498Simp
12187498Simp#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
13187498Simp/*-
14187498Simp * IMPLEMENTATION NOTES.
15187498Simp *
16187498Simp * As you might have noticed 32-bit hash algorithms:
17187498Simp *
18187498Simp * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
19187498Simp * - optimized versions implement two transform functions: one operating
20187498Simp *   on [aligned] data in host byte order and one - on data in input
21187498Simp *   stream byte order;
22187498Simp * - share common byte-order neutral collector and padding function
23187498Simp *   implementations, ../md32_common.h;
24187498Simp *
25187498Simp * Neither of the above applies to this SHA-512 implementations. Reasons
26187498Simp * [in reverse order] are:
27187498Simp *
28187498Simp * - it's the only 64-bit hash algorithm for the moment of this writing,
29187498Simp *   there is no need for common collector/padding implementation [yet];
30187498Simp * - by supporting only one transform function [which operates on
31187498Simp *   *aligned* data in input stream byte order, big-endian in this case]
32187498Simp *   we minimize burden of maintenance in two ways: a) collector/padding
33187498Simp *   function is simpler; b) only one transform function to stare at;
34187498Simp * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
35187498Simp *   apply a number of optimizations to mitigate potential performance
36187498Simp *   penalties caused by previous design decision;
37187498Simp *
38187498Simp * Caveat lector.
39187498Simp *
40187498Simp * Implementation relies on the fact that "long long" is 64-bit on
41187498Simp * both 32- and 64-bit platforms. If some compiler vendor comes up
42187498Simp * with 128-bit long long, adjustment to sha.h would be required.
43187498Simp * As this implementation relies on 64-bit integer type, it's totally
44187498Simp * inappropriate for platforms which don't support it, most notably
45187498Simp * 16-bit platforms.
46187498Simp *                                      <appro@fy.chalmers.se>
47187498Simp */
48187498Simp# include <stdlib.h>
49187498Simp# include <string.h>
50187498Simp
51187498Simp# include <openssl/crypto.h>
52187498Simp# include <openssl/sha.h>
53187498Simp# include <openssl/opensslv.h>
54187498Simp
55187498Simp# include "cryptlib.h"
56187498Simp
57187498Simpconst char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT;
58187498Simp
59187498Simp# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
60187498Simp    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
61187498Simp    defined(__s390__) || defined(__s390x__) || \
62187498Simp    defined(SHA512_ASM)
63187498Simp#  define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
64187498Simp# endif
65187498Simp
66187498Simpint SHA384_Init(SHA512_CTX *c)
67187498Simp{
68187498Simp# ifdef OPENSSL_FIPS
69187498Simp    FIPS_selftest_check();
70187498Simp# endif
71187498Simp    c->h[0] = U64(0xcbbb9d5dc1059ed8);
72187498Simp    c->h[1] = U64(0x629a292a367cd507);
73187498Simp    c->h[2] = U64(0x9159015a3070dd17);
74187498Simp    c->h[3] = U64(0x152fecd8f70e5939);
75187498Simp    c->h[4] = U64(0x67332667ffc00b31);
76187498Simp    c->h[5] = U64(0x8eb44a8768581511);
77187498Simp    c->h[6] = U64(0xdb0c2e0d64f98fa7);
78187498Simp    c->h[7] = U64(0x47b5481dbefa4fa4);
79187498Simp    c->Nl = 0;
80187498Simp    c->Nh = 0;
81187498Simp    c->num = 0;
82187498Simp    c->md_len = SHA384_DIGEST_LENGTH;
83187498Simp    return 1;
84340145Smmacy}
85187498Simp
86187498Simpint SHA512_Init(SHA512_CTX *c)
87187498Simp{
88# ifdef OPENSSL_FIPS
89    FIPS_selftest_check();
90# endif
91    c->h[0] = U64(0x6a09e667f3bcc908);
92    c->h[1] = U64(0xbb67ae8584caa73b);
93    c->h[2] = U64(0x3c6ef372fe94f82b);
94    c->h[3] = U64(0xa54ff53a5f1d36f1);
95    c->h[4] = U64(0x510e527fade682d1);
96    c->h[5] = U64(0x9b05688c2b3e6c1f);
97    c->h[6] = U64(0x1f83d9abfb41bd6b);
98    c->h[7] = U64(0x5be0cd19137e2179);
99    c->Nl = 0;
100    c->Nh = 0;
101    c->num = 0;
102    c->md_len = SHA512_DIGEST_LENGTH;
103    return 1;
104}
105
106# ifndef SHA512_ASM
107static
108# endif
109void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
110
111int SHA512_Final(unsigned char *md, SHA512_CTX *c)
112{
113    unsigned char *p = (unsigned char *)c->u.p;
114    size_t n = c->num;
115
116    p[n] = 0x80;                /* There always is a room for one */
117    n++;
118    if (n > (sizeof(c->u) - 16))
119        memset(p + n, 0, sizeof(c->u) - n), n = 0,
120            sha512_block_data_order(c, p, 1);
121
122    memset(p + n, 0, sizeof(c->u) - 16 - n);
123# ifdef  B_ENDIAN
124    c->u.d[SHA_LBLOCK - 2] = c->Nh;
125    c->u.d[SHA_LBLOCK - 1] = c->Nl;
126# else
127    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
128    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
129    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
130    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
131    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
132    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
133    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
134    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
135    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
136    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
137    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
138    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
139    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
140    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
141    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
142    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
143# endif
144
145    sha512_block_data_order(c, p, 1);
146
147    if (md == 0)
148        return 0;
149
150    switch (c->md_len) {
151        /* Let compiler decide if it's appropriate to unroll... */
152    case SHA384_DIGEST_LENGTH:
153        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
154            SHA_LONG64 t = c->h[n];
155
156            *(md++) = (unsigned char)(t >> 56);
157            *(md++) = (unsigned char)(t >> 48);
158            *(md++) = (unsigned char)(t >> 40);
159            *(md++) = (unsigned char)(t >> 32);
160            *(md++) = (unsigned char)(t >> 24);
161            *(md++) = (unsigned char)(t >> 16);
162            *(md++) = (unsigned char)(t >> 8);
163            *(md++) = (unsigned char)(t);
164        }
165        break;
166    case SHA512_DIGEST_LENGTH:
167        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
168            SHA_LONG64 t = c->h[n];
169
170            *(md++) = (unsigned char)(t >> 56);
171            *(md++) = (unsigned char)(t >> 48);
172            *(md++) = (unsigned char)(t >> 40);
173            *(md++) = (unsigned char)(t >> 32);
174            *(md++) = (unsigned char)(t >> 24);
175            *(md++) = (unsigned char)(t >> 16);
176            *(md++) = (unsigned char)(t >> 8);
177            *(md++) = (unsigned char)(t);
178        }
179        break;
180        /* ... as well as make sure md_len is not abused. */
181    default:
182        return 0;
183    }
184
185    return 1;
186}
187
188int SHA384_Final(unsigned char *md, SHA512_CTX *c)
189{
190    return SHA512_Final(md, c);
191}
192
193int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
194{
195    SHA_LONG64 l;
196    unsigned char *p = c->u.p;
197    const unsigned char *data = (const unsigned char *)_data;
198
199    if (len == 0)
200        return 1;
201
202    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
203    if (l < c->Nl)
204        c->Nh++;
205    if (sizeof(len) >= 8)
206        c->Nh += (((SHA_LONG64) len) >> 61);
207    c->Nl = l;
208
209    if (c->num != 0) {
210        size_t n = sizeof(c->u) - c->num;
211
212        if (len < n) {
213            memcpy(p + c->num, data, len), c->num += len;
214            return 1;
215        } else {
216            memcpy(p + c->num, data, n), c->num = 0;
217            len -= n, data += n;
218            sha512_block_data_order(c, p, 1);
219        }
220    }
221
222    if (len >= sizeof(c->u)) {
223# ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
224        if ((size_t)data % sizeof(c->u.d[0]) != 0)
225            while (len >= sizeof(c->u))
226                memcpy(p, data, sizeof(c->u)),
227                    sha512_block_data_order(c, p, 1),
228                    len -= sizeof(c->u), data += sizeof(c->u);
229        else
230# endif
231            sha512_block_data_order(c, data, len / sizeof(c->u)),
232                data += len, len %= sizeof(c->u), data -= len;
233    }
234
235    if (len != 0)
236        memcpy(p, data, len), c->num = (int)len;
237
238    return 1;
239}
240
241int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
242{
243    return SHA512_Update(c, data, len);
244}
245
246void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
247{
248    sha512_block_data_order(c, data, 1);
249}
250
251unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
252{
253    SHA512_CTX c;
254    static unsigned char m[SHA384_DIGEST_LENGTH];
255
256    if (md == NULL)
257        md = m;
258    SHA384_Init(&c);
259    SHA512_Update(&c, d, n);
260    SHA512_Final(md, &c);
261    OPENSSL_cleanse(&c, sizeof(c));
262    return (md);
263}
264
265unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
266{
267    SHA512_CTX c;
268    static unsigned char m[SHA512_DIGEST_LENGTH];
269
270    if (md == NULL)
271        md = m;
272    SHA512_Init(&c);
273    SHA512_Update(&c, d, n);
274    SHA512_Final(md, &c);
275    OPENSSL_cleanse(&c, sizeof(c));
276    return (md);
277}
278
279# ifndef SHA512_ASM
280static const SHA_LONG64 K512[80] = {
281    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
282    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
283    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
284    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
285    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
286    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
287    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
288    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
289    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
290    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
291    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
292    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
293    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
294    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
295    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
296    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
297    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
298    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
299    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
300    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
301    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
302    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
303    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
304    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
305    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
306    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
307    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
308    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
309    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
310    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
311    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
312    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
313    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
314    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
315    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
316    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
317    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
318    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
319    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
320    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
321};
322
323#  ifndef PEDANTIC
324#   if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
325#    if defined(__x86_64) || defined(__x86_64__)
326#     define ROTR(a,n)    ({ unsigned long ret;           \
327                                asm ("rorq %1,%0"       \
328                                : "=r"(ret)             \
329                                : "J"(n),"0"(a)         \
330                                : "cc"); ret;           })
331#     if !defined(B_ENDIAN)
332#      define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
333                                asm ("bswapq    %0"             \
334                                : "=r"(ret)                     \
335                                : "0"(ret)); ret;               })
336#     endif
337#    elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
338#     if defined(I386_ONLY)
339#      define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
340                         unsigned int hi=p[0],lo=p[1];          \
341                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
342                                    "roll $16,%%eax; roll $16,%%edx; "\
343                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
344                                : "=a"(lo),"=d"(hi)             \
345                                : "0"(lo),"1"(hi) : "cc");      \
346                                ((SHA_LONG64)hi)<<32|lo;        })
347#     else
348#      define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
349                         unsigned int hi=p[0],lo=p[1];                  \
350                                asm ("bswapl %0; bswapl %1;"    \
351                                : "=r"(lo),"=r"(hi)             \
352                                : "0"(lo),"1"(hi));             \
353                                ((SHA_LONG64)hi)<<32|lo;        })
354#     endif
355#    elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
356#     define ROTR(a,n)    ({ unsigned long ret;           \
357                                asm ("rotrdi %0,%1,%2"  \
358                                : "=r"(ret)             \
359                                : "r"(a),"K"(n)); ret;  })
360#    endif
361#   elif defined(_MSC_VER)
362#    if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
363#     define ROTR(a,n)    _rotr64((a),n)
364#    endif
365#    if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
366#     if defined(I386_ONLY)
367static SHA_LONG64 __fastcall __pull64be(const void *x)
368{
369    _asm mov edx,[ecx + 0]
370    _asm mov eax,[ecx + 4]
371_asm xchg dh, dl
372        _asm xchg ah, al
373        _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
374#     else
375static SHA_LONG64 __fastcall __pull64be(const void *x)
376{
377    _asm mov edx,[ecx + 0]
378    _asm mov eax,[ecx + 4]
379_asm bswap edx _asm bswap eax}
380#     endif
381#     define PULL64(x) __pull64be(&(x))
382#     if _MSC_VER<=1200
383#      pragma inline_depth(0)
384#     endif
385#    endif
386#   endif
387#  endif
388#  ifndef PULL64
389#   define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
390#   define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
391#  endif
392#  ifndef ROTR
393#   define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
394#  endif
395#  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
396#  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
397#  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
398#  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
399#  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
400#  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
401#  if defined(OPENSSL_IA32_SSE2) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
402#   define GO_FOR_SSE2(ctx,in,num)         do {            \
403        void    sha512_block_sse2(void *,const void *,size_t);  \
404        if (!(OPENSSL_ia32cap_P & (1<<26))) break;      \
405        sha512_block_sse2(ctx->h,in,num); return;       \
406                                        } while (0)
407#  endif
408#  ifdef OPENSSL_SMALL_FOOTPRINT
409static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
410                                    size_t num)
411{
412    const SHA_LONG64 *W = in;
413    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
414    SHA_LONG64 X[16];
415    int i;
416
417#   ifdef GO_FOR_SSE2
418    GO_FOR_SSE2(ctx, in, num);
419#   endif
420
421    while (num--) {
422
423        a = ctx->h[0];
424        b = ctx->h[1];
425        c = ctx->h[2];
426        d = ctx->h[3];
427        e = ctx->h[4];
428        f = ctx->h[5];
429        g = ctx->h[6];
430        h = ctx->h[7];
431
432        for (i = 0; i < 16; i++) {
433#   ifdef B_ENDIAN
434            T1 = X[i] = W[i];
435#   else
436            T1 = X[i] = PULL64(W[i]);
437#   endif
438            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
439            T2 = Sigma0(a) + Maj(a, b, c);
440            h = g;
441            g = f;
442            f = e;
443            e = d + T1;
444            d = c;
445            c = b;
446            b = a;
447            a = T1 + T2;
448        }
449
450        for (; i < 80; i++) {
451            s0 = X[(i + 1) & 0x0f];
452            s0 = sigma0(s0);
453            s1 = X[(i + 14) & 0x0f];
454            s1 = sigma1(s1);
455
456            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
457            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
458            T2 = Sigma0(a) + Maj(a, b, c);
459            h = g;
460            g = f;
461            f = e;
462            e = d + T1;
463            d = c;
464            c = b;
465            b = a;
466            a = T1 + T2;
467        }
468
469        ctx->h[0] += a;
470        ctx->h[1] += b;
471        ctx->h[2] += c;
472        ctx->h[3] += d;
473        ctx->h[4] += e;
474        ctx->h[5] += f;
475        ctx->h[6] += g;
476        ctx->h[7] += h;
477
478        W += SHA_LBLOCK;
479    }
480}
481
482#  else
483#   define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
484        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
485        h = Sigma0(a) + Maj(a,b,c);                     \
486        d += T1;        h += T1;                } while (0)
487#   define ROUND_16_80(i,a,b,c,d,e,f,g,h,X)        do {    \
488        s0 = X[(i+1)&0x0f];     s0 = sigma0(s0);        \
489        s1 = X[(i+14)&0x0f];    s1 = sigma1(s1);        \
490        T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f];    \
491        ROUND_00_15(i,a,b,c,d,e,f,g,h);         } while (0)
492static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
493                                    size_t num)
494{
495    const SHA_LONG64 *W = in;
496    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
497    SHA_LONG64 X[16];
498    int i;
499
500#   ifdef GO_FOR_SSE2
501    GO_FOR_SSE2(ctx, in, num);
502#   endif
503
504    while (num--) {
505
506        a = ctx->h[0];
507        b = ctx->h[1];
508        c = ctx->h[2];
509        d = ctx->h[3];
510        e = ctx->h[4];
511        f = ctx->h[5];
512        g = ctx->h[6];
513        h = ctx->h[7];
514
515#   ifdef B_ENDIAN
516        T1 = X[0] = W[0];
517        ROUND_00_15(0, a, b, c, d, e, f, g, h);
518        T1 = X[1] = W[1];
519        ROUND_00_15(1, h, a, b, c, d, e, f, g);
520        T1 = X[2] = W[2];
521        ROUND_00_15(2, g, h, a, b, c, d, e, f);
522        T1 = X[3] = W[3];
523        ROUND_00_15(3, f, g, h, a, b, c, d, e);
524        T1 = X[4] = W[4];
525        ROUND_00_15(4, e, f, g, h, a, b, c, d);
526        T1 = X[5] = W[5];
527        ROUND_00_15(5, d, e, f, g, h, a, b, c);
528        T1 = X[6] = W[6];
529        ROUND_00_15(6, c, d, e, f, g, h, a, b);
530        T1 = X[7] = W[7];
531        ROUND_00_15(7, b, c, d, e, f, g, h, a);
532        T1 = X[8] = W[8];
533        ROUND_00_15(8, a, b, c, d, e, f, g, h);
534        T1 = X[9] = W[9];
535        ROUND_00_15(9, h, a, b, c, d, e, f, g);
536        T1 = X[10] = W[10];
537        ROUND_00_15(10, g, h, a, b, c, d, e, f);
538        T1 = X[11] = W[11];
539        ROUND_00_15(11, f, g, h, a, b, c, d, e);
540        T1 = X[12] = W[12];
541        ROUND_00_15(12, e, f, g, h, a, b, c, d);
542        T1 = X[13] = W[13];
543        ROUND_00_15(13, d, e, f, g, h, a, b, c);
544        T1 = X[14] = W[14];
545        ROUND_00_15(14, c, d, e, f, g, h, a, b);
546        T1 = X[15] = W[15];
547        ROUND_00_15(15, b, c, d, e, f, g, h, a);
548#   else
549        T1 = X[0] = PULL64(W[0]);
550        ROUND_00_15(0, a, b, c, d, e, f, g, h);
551        T1 = X[1] = PULL64(W[1]);
552        ROUND_00_15(1, h, a, b, c, d, e, f, g);
553        T1 = X[2] = PULL64(W[2]);
554        ROUND_00_15(2, g, h, a, b, c, d, e, f);
555        T1 = X[3] = PULL64(W[3]);
556        ROUND_00_15(3, f, g, h, a, b, c, d, e);
557        T1 = X[4] = PULL64(W[4]);
558        ROUND_00_15(4, e, f, g, h, a, b, c, d);
559        T1 = X[5] = PULL64(W[5]);
560        ROUND_00_15(5, d, e, f, g, h, a, b, c);
561        T1 = X[6] = PULL64(W[6]);
562        ROUND_00_15(6, c, d, e, f, g, h, a, b);
563        T1 = X[7] = PULL64(W[7]);
564        ROUND_00_15(7, b, c, d, e, f, g, h, a);
565        T1 = X[8] = PULL64(W[8]);
566        ROUND_00_15(8, a, b, c, d, e, f, g, h);
567        T1 = X[9] = PULL64(W[9]);
568        ROUND_00_15(9, h, a, b, c, d, e, f, g);
569        T1 = X[10] = PULL64(W[10]);
570        ROUND_00_15(10, g, h, a, b, c, d, e, f);
571        T1 = X[11] = PULL64(W[11]);
572        ROUND_00_15(11, f, g, h, a, b, c, d, e);
573        T1 = X[12] = PULL64(W[12]);
574        ROUND_00_15(12, e, f, g, h, a, b, c, d);
575        T1 = X[13] = PULL64(W[13]);
576        ROUND_00_15(13, d, e, f, g, h, a, b, c);
577        T1 = X[14] = PULL64(W[14]);
578        ROUND_00_15(14, c, d, e, f, g, h, a, b);
579        T1 = X[15] = PULL64(W[15]);
580        ROUND_00_15(15, b, c, d, e, f, g, h, a);
581#   endif
582
583        for (i = 16; i < 80; i += 8) {
584            ROUND_16_80(i + 0, a, b, c, d, e, f, g, h, X);
585            ROUND_16_80(i + 1, h, a, b, c, d, e, f, g, X);
586            ROUND_16_80(i + 2, g, h, a, b, c, d, e, f, X);
587            ROUND_16_80(i + 3, f, g, h, a, b, c, d, e, X);
588            ROUND_16_80(i + 4, e, f, g, h, a, b, c, d, X);
589            ROUND_16_80(i + 5, d, e, f, g, h, a, b, c, X);
590            ROUND_16_80(i + 6, c, d, e, f, g, h, a, b, X);
591            ROUND_16_80(i + 7, b, c, d, e, f, g, h, a, X);
592        }
593
594        ctx->h[0] += a;
595        ctx->h[1] += b;
596        ctx->h[2] += c;
597        ctx->h[3] += d;
598        ctx->h[4] += e;
599        ctx->h[5] += f;
600        ctx->h[6] += g;
601        ctx->h[7] += h;
602
603        W += SHA_LBLOCK;
604    }
605}
606
607#  endif
608
609# endif                         /* SHA512_ASM */
610
611#else                           /* OPENSSL_NO_SHA512 */
612
613/*
614 * Sensitive compilers ("Compaq C V6.4-005 on OpenVMS VAX V7.3", for example)
615 * dislike a statement-free file, complaining: "%CC-W-EMPTYFILE, Source file
616 * does not contain any declarations."
617 */
618
619int sha512_dummy();
620
621#endif                          /* OPENSSL_NO_SHA512 */
622