1/*
2 * Copyright 2004-2021 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10/*
11 * SHA512 low level APIs are deprecated for public use, but still ok for
12 * internal use.
13 */
14#include "internal/deprecated.h"
15
16#include <stdio.h>
17#include <openssl/opensslconf.h>
18/*-
19 * IMPLEMENTATION NOTES.
20 *
21 * As you might have noticed 32-bit hash algorithms:
22 *
23 * - permit SHA_LONG to be wider than 32-bit
24 * - optimized versions implement two transform functions: one operating
25 *   on [aligned] data in host byte order and one - on data in input
26 *   stream byte order;
27 * - share common byte-order neutral collector and padding function
28 *   implementations, ../md32_common.h;
29 *
30 * Neither of the above applies to this SHA-512 implementations. Reasons
31 * [in reverse order] are:
32 *
33 * - it's the only 64-bit hash algorithm for the moment of this writing,
34 *   there is no need for common collector/padding implementation [yet];
35 * - by supporting only one transform function [which operates on
36 *   *aligned* data in input stream byte order, big-endian in this case]
37 *   we minimize burden of maintenance in two ways: a) collector/padding
38 *   function is simpler; b) only one transform function to stare at;
39 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40 *   apply a number of optimizations to mitigate potential performance
41 *   penalties caused by previous design decision;
42 *
43 * Caveat lector.
44 *
45 * Implementation relies on the fact that "long long" is 64-bit on
46 * both 32- and 64-bit platforms. If some compiler vendor comes up
47 * with 128-bit long long, adjustment to sha.h would be required.
48 * As this implementation relies on 64-bit integer type, it's totally
49 * inappropriate for platforms which don't support it, most notably
50 * 16-bit platforms.
51 */
52#include <stdlib.h>
53#include <string.h>
54
55#include <openssl/crypto.h>
56#include <openssl/sha.h>
57#include <openssl/opensslv.h>
58
59#include "internal/cryptlib.h"
60#include "crypto/sha.h"
61
62#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63    defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64    defined(__s390__) || defined(__s390x__) || \
65    defined(__aarch64__) || \
66    defined(SHA512_ASM)
67# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68#endif
69
70#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71# define U64(C)     C##UI64
72#elif defined(__arch64__)
73# define U64(C)     C##UL
74#else
75# define U64(C)     C##ULL
76#endif
77
78int sha512_224_init(SHA512_CTX *c)
79{
80    c->h[0] = U64(0x8c3d37c819544da2);
81    c->h[1] = U64(0x73e1996689dcd4d6);
82    c->h[2] = U64(0x1dfab7ae32ff9c82);
83    c->h[3] = U64(0x679dd514582f9fcf);
84    c->h[4] = U64(0x0f6d2b697bd44da8);
85    c->h[5] = U64(0x77e36f7304c48942);
86    c->h[6] = U64(0x3f9d85a86a1d36c8);
87    c->h[7] = U64(0x1112e6ad91d692a1);
88
89    c->Nl = 0;
90    c->Nh = 0;
91    c->num = 0;
92    c->md_len = SHA224_DIGEST_LENGTH;
93    return 1;
94}
95
96int sha512_256_init(SHA512_CTX *c)
97{
98    c->h[0] = U64(0x22312194fc2bf72c);
99    c->h[1] = U64(0x9f555fa3c84c64c2);
100    c->h[2] = U64(0x2393b86b6f53b151);
101    c->h[3] = U64(0x963877195940eabd);
102    c->h[4] = U64(0x96283ee2a88effe3);
103    c->h[5] = U64(0xbe5e1e2553863992);
104    c->h[6] = U64(0x2b0199fc2c85b8aa);
105    c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107    c->Nl = 0;
108    c->Nh = 0;
109    c->num = 0;
110    c->md_len = SHA256_DIGEST_LENGTH;
111    return 1;
112}
113
114int SHA384_Init(SHA512_CTX *c)
115{
116    c->h[0] = U64(0xcbbb9d5dc1059ed8);
117    c->h[1] = U64(0x629a292a367cd507);
118    c->h[2] = U64(0x9159015a3070dd17);
119    c->h[3] = U64(0x152fecd8f70e5939);
120    c->h[4] = U64(0x67332667ffc00b31);
121    c->h[5] = U64(0x8eb44a8768581511);
122    c->h[6] = U64(0xdb0c2e0d64f98fa7);
123    c->h[7] = U64(0x47b5481dbefa4fa4);
124
125    c->Nl = 0;
126    c->Nh = 0;
127    c->num = 0;
128    c->md_len = SHA384_DIGEST_LENGTH;
129    return 1;
130}
131
132int SHA512_Init(SHA512_CTX *c)
133{
134    c->h[0] = U64(0x6a09e667f3bcc908);
135    c->h[1] = U64(0xbb67ae8584caa73b);
136    c->h[2] = U64(0x3c6ef372fe94f82b);
137    c->h[3] = U64(0xa54ff53a5f1d36f1);
138    c->h[4] = U64(0x510e527fade682d1);
139    c->h[5] = U64(0x9b05688c2b3e6c1f);
140    c->h[6] = U64(0x1f83d9abfb41bd6b);
141    c->h[7] = U64(0x5be0cd19137e2179);
142
143    c->Nl = 0;
144    c->Nh = 0;
145    c->num = 0;
146    c->md_len = SHA512_DIGEST_LENGTH;
147    return 1;
148}
149
150#ifndef SHA512_ASM
151static
152#endif
153void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
154
155int SHA512_Final(unsigned char *md, SHA512_CTX *c)
156{
157    unsigned char *p = (unsigned char *)c->u.p;
158    size_t n = c->num;
159
160    p[n] = 0x80;                /* There always is a room for one */
161    n++;
162    if (n > (sizeof(c->u) - 16)) {
163        memset(p + n, 0, sizeof(c->u) - n);
164        n = 0;
165        sha512_block_data_order(c, p, 1);
166    }
167
168    memset(p + n, 0, sizeof(c->u) - 16 - n);
169#ifdef  B_ENDIAN
170    c->u.d[SHA_LBLOCK - 2] = c->Nh;
171    c->u.d[SHA_LBLOCK - 1] = c->Nl;
172#else
173    p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174    p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175    p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176    p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177    p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178    p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179    p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180    p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181    p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182    p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183    p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184    p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185    p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186    p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187    p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188    p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
189#endif
190
191    sha512_block_data_order(c, p, 1);
192
193    if (md == 0)
194        return 0;
195
196    switch (c->md_len) {
197    /* Let compiler decide if it's appropriate to unroll... */
198    case SHA224_DIGEST_LENGTH:
199        for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200            SHA_LONG64 t = c->h[n];
201
202            *(md++) = (unsigned char)(t >> 56);
203            *(md++) = (unsigned char)(t >> 48);
204            *(md++) = (unsigned char)(t >> 40);
205            *(md++) = (unsigned char)(t >> 32);
206            *(md++) = (unsigned char)(t >> 24);
207            *(md++) = (unsigned char)(t >> 16);
208            *(md++) = (unsigned char)(t >> 8);
209            *(md++) = (unsigned char)(t);
210        }
211        /*
212         * For 224 bits, there are four bytes left over that have to be
213         * processed separately.
214         */
215        {
216            SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
217
218            *(md++) = (unsigned char)(t >> 56);
219            *(md++) = (unsigned char)(t >> 48);
220            *(md++) = (unsigned char)(t >> 40);
221            *(md++) = (unsigned char)(t >> 32);
222        }
223        break;
224    case SHA256_DIGEST_LENGTH:
225        for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226            SHA_LONG64 t = c->h[n];
227
228            *(md++) = (unsigned char)(t >> 56);
229            *(md++) = (unsigned char)(t >> 48);
230            *(md++) = (unsigned char)(t >> 40);
231            *(md++) = (unsigned char)(t >> 32);
232            *(md++) = (unsigned char)(t >> 24);
233            *(md++) = (unsigned char)(t >> 16);
234            *(md++) = (unsigned char)(t >> 8);
235            *(md++) = (unsigned char)(t);
236        }
237        break;
238    case SHA384_DIGEST_LENGTH:
239        for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240            SHA_LONG64 t = c->h[n];
241
242            *(md++) = (unsigned char)(t >> 56);
243            *(md++) = (unsigned char)(t >> 48);
244            *(md++) = (unsigned char)(t >> 40);
245            *(md++) = (unsigned char)(t >> 32);
246            *(md++) = (unsigned char)(t >> 24);
247            *(md++) = (unsigned char)(t >> 16);
248            *(md++) = (unsigned char)(t >> 8);
249            *(md++) = (unsigned char)(t);
250        }
251        break;
252    case SHA512_DIGEST_LENGTH:
253        for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254            SHA_LONG64 t = c->h[n];
255
256            *(md++) = (unsigned char)(t >> 56);
257            *(md++) = (unsigned char)(t >> 48);
258            *(md++) = (unsigned char)(t >> 40);
259            *(md++) = (unsigned char)(t >> 32);
260            *(md++) = (unsigned char)(t >> 24);
261            *(md++) = (unsigned char)(t >> 16);
262            *(md++) = (unsigned char)(t >> 8);
263            *(md++) = (unsigned char)(t);
264        }
265        break;
266    /* ... as well as make sure md_len is not abused. */
267    default:
268        return 0;
269    }
270
271    return 1;
272}
273
274int SHA384_Final(unsigned char *md, SHA512_CTX *c)
275{
276    return SHA512_Final(md, c);
277}
278
279int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
280{
281    SHA_LONG64 l;
282    unsigned char *p = c->u.p;
283    const unsigned char *data = (const unsigned char *)_data;
284
285    if (len == 0)
286        return 1;
287
288    l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
289    if (l < c->Nl)
290        c->Nh++;
291    if (sizeof(len) >= 8)
292        c->Nh += (((SHA_LONG64) len) >> 61);
293    c->Nl = l;
294
295    if (c->num != 0) {
296        size_t n = sizeof(c->u) - c->num;
297
298        if (len < n) {
299            memcpy(p + c->num, data, len), c->num += (unsigned int)len;
300            return 1;
301        } else {
302            memcpy(p + c->num, data, n), c->num = 0;
303            len -= n, data += n;
304            sha512_block_data_order(c, p, 1);
305        }
306    }
307
308    if (len >= sizeof(c->u)) {
309#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310        if ((size_t)data % sizeof(c->u.d[0]) != 0)
311            while (len >= sizeof(c->u))
312                memcpy(p, data, sizeof(c->u)),
313                sha512_block_data_order(c, p, 1),
314                len -= sizeof(c->u), data += sizeof(c->u);
315        else
316#endif
317            sha512_block_data_order(c, data, len / sizeof(c->u)),
318            data += len, len %= sizeof(c->u), data -= len;
319    }
320
321    if (len != 0)
322        memcpy(p, data, len), c->num = (int)len;
323
324    return 1;
325}
326
327int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
328{
329    return SHA512_Update(c, data, len);
330}
331
332void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
333{
334#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335    if ((size_t)data % sizeof(c->u.d[0]) != 0)
336        memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
337#endif
338    sha512_block_data_order(c, data, 1);
339}
340
341#ifndef SHA512_ASM
342static const SHA_LONG64 K512[80] = {
343    U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
344    U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
345    U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
346    U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
347    U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
348    U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
349    U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
350    U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
351    U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
352    U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
353    U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
354    U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
355    U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
356    U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
357    U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
358    U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
359    U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
360    U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
361    U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
362    U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
363    U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
364    U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
365    U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
366    U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
367    U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
368    U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
369    U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
370    U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
371    U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
372    U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
373    U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
374    U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
375    U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
376    U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
377    U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
378    U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
379    U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
380    U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
381    U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
382    U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
383};
384
385# ifndef PEDANTIC
386#  if defined(__GNUC__) && __GNUC__>=2 && \
387      !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
388#   if defined(__x86_64) || defined(__x86_64__)
389#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
390                                asm ("rorq %1,%0"       \
391                                : "=r"(ret)             \
392                                : "J"(n),"0"(a)         \
393                                : "cc"); ret;           })
394#    if !defined(B_ENDIAN)
395#     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
396                                asm ("bswapq    %0"             \
397                                : "=r"(ret)                     \
398                                : "0"(ret)); ret;               })
399#    endif
400#   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
401#    if defined(I386_ONLY)
402#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
403                          unsigned int hi=p[0],lo=p[1];          \
404                                asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
405                                    "roll $16,%%eax; roll $16,%%edx; "\
406                                    "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
407                                : "=a"(lo),"=d"(hi)             \
408                                : "0"(lo),"1"(hi) : "cc");      \
409                                ((SHA_LONG64)hi)<<32|lo;        })
410#    else
411#     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
412                          unsigned int hi=p[0],lo=p[1];         \
413                                asm ("bswapl %0; bswapl %1;"    \
414                                : "=r"(lo),"=r"(hi)             \
415                                : "0"(lo),"1"(hi));             \
416                                ((SHA_LONG64)hi)<<32|lo;        })
417#    endif
418#   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
419#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
420                                asm ("rotrdi %0,%1,%2"  \
421                                : "=r"(ret)             \
422                                : "r"(a),"K"(n)); ret;  })
423#   elif defined(__aarch64__)
424#    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
425                                asm ("ror %0,%1,%2"     \
426                                : "=r"(ret)             \
427                                : "r"(a),"I"(n)); ret;  })
428#    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
429        __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
430#     define PULL64(x)   ({ SHA_LONG64 ret;                     \
431                                asm ("rev       %0,%1"          \
432                                : "=r"(ret)                     \
433                                : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
434#    endif
435#   endif
436#  elif defined(_MSC_VER)
437#   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
438#    pragma intrinsic(_rotr64)
439#    define ROTR(a,n)    _rotr64((a),n)
440#   endif
441#   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
442       !defined(OPENSSL_NO_INLINE_ASM)
443#    if defined(I386_ONLY)
444static SHA_LONG64 __fastcall __pull64be(const void *x)
445{
446    _asm mov  edx,[ecx + 0]
447    _asm mov  eax,[ecx + 4]
448    _asm xchg dh, dl
449    _asm xchg ah, al
450    _asm rol  edx, 16
451    _asm rol  eax, 16
452    _asm xchg dh, dl
453    _asm xchg ah, al
454}
455#    else
456static SHA_LONG64 __fastcall __pull64be(const void *x)
457{
458    _asm mov   edx,[ecx + 0]
459    _asm mov   eax,[ecx + 4]
460    _asm bswap edx
461    _asm bswap eax
462}
463#    endif
464#    define PULL64(x) __pull64be(&(x))
465#   endif
466#  endif
467# endif
468# ifndef PULL64
469#  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
470#  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
471# endif
472# ifndef ROTR
473#  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
474# endif
475# define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
476# define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
477# define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
478# define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
479# define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
480# define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
481
482# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
483/*
484 * This code should give better results on 32-bit CPU with less than
485 * ~24 registers, both size and performance wise...
486 */
487
488static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
489                                    size_t num)
490{
491    const SHA_LONG64 *W = in;
492    SHA_LONG64 A, E, T;
493    SHA_LONG64 X[9 + 80], *F;
494    int i;
495
496    while (num--) {
497
498        F = X + 80;
499        A = ctx->h[0];
500        F[1] = ctx->h[1];
501        F[2] = ctx->h[2];
502        F[3] = ctx->h[3];
503        E = ctx->h[4];
504        F[5] = ctx->h[5];
505        F[6] = ctx->h[6];
506        F[7] = ctx->h[7];
507
508        for (i = 0; i < 16; i++, F--) {
509#  ifdef B_ENDIAN
510            T = W[i];
511#  else
512            T = PULL64(W[i]);
513#  endif
514            F[0] = A;
515            F[4] = E;
516            F[8] = T;
517            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
518            E = F[3] + T;
519            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
520        }
521
522        for (; i < 80; i++, F--) {
523            T = sigma0(F[8 + 16 - 1]);
524            T += sigma1(F[8 + 16 - 14]);
525            T += F[8 + 16] + F[8 + 16 - 9];
526
527            F[0] = A;
528            F[4] = E;
529            F[8] = T;
530            T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
531            E = F[3] + T;
532            A = T + Sigma0(A) + Maj(A, F[1], F[2]);
533        }
534
535        ctx->h[0] += A;
536        ctx->h[1] += F[1];
537        ctx->h[2] += F[2];
538        ctx->h[3] += F[3];
539        ctx->h[4] += E;
540        ctx->h[5] += F[5];
541        ctx->h[6] += F[6];
542        ctx->h[7] += F[7];
543
544        W += SHA_LBLOCK;
545    }
546}
547
548# elif defined(OPENSSL_SMALL_FOOTPRINT)
549
550static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
551                                    size_t num)
552{
553    const SHA_LONG64 *W = in;
554    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
555    SHA_LONG64 X[16];
556    int i;
557
558    while (num--) {
559
560        a = ctx->h[0];
561        b = ctx->h[1];
562        c = ctx->h[2];
563        d = ctx->h[3];
564        e = ctx->h[4];
565        f = ctx->h[5];
566        g = ctx->h[6];
567        h = ctx->h[7];
568
569        for (i = 0; i < 16; i++) {
570#  ifdef B_ENDIAN
571            T1 = X[i] = W[i];
572#  else
573            T1 = X[i] = PULL64(W[i]);
574#  endif
575            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
576            T2 = Sigma0(a) + Maj(a, b, c);
577            h = g;
578            g = f;
579            f = e;
580            e = d + T1;
581            d = c;
582            c = b;
583            b = a;
584            a = T1 + T2;
585        }
586
587        for (; i < 80; i++) {
588            s0 = X[(i + 1) & 0x0f];
589            s0 = sigma0(s0);
590            s1 = X[(i + 14) & 0x0f];
591            s1 = sigma1(s1);
592
593            T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
594            T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
595            T2 = Sigma0(a) + Maj(a, b, c);
596            h = g;
597            g = f;
598            f = e;
599            e = d + T1;
600            d = c;
601            c = b;
602            b = a;
603            a = T1 + T2;
604        }
605
606        ctx->h[0] += a;
607        ctx->h[1] += b;
608        ctx->h[2] += c;
609        ctx->h[3] += d;
610        ctx->h[4] += e;
611        ctx->h[5] += f;
612        ctx->h[6] += g;
613        ctx->h[7] += h;
614
615        W += SHA_LBLOCK;
616    }
617}
618
619# else
620#  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
621        T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
622        h = Sigma0(a) + Maj(a,b,c);                     \
623        d += T1;        h += T1;                        } while (0)
624
625#  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
626        s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
627        s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
628        T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
629        ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
630
631static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
632                                    size_t num)
633{
634    const SHA_LONG64 *W = in;
635    SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
636    SHA_LONG64 X[16];
637    int i;
638
639    while (num--) {
640
641        a = ctx->h[0];
642        b = ctx->h[1];
643        c = ctx->h[2];
644        d = ctx->h[3];
645        e = ctx->h[4];
646        f = ctx->h[5];
647        g = ctx->h[6];
648        h = ctx->h[7];
649
650#  ifdef B_ENDIAN
651        T1 = X[0] = W[0];
652        ROUND_00_15(0, a, b, c, d, e, f, g, h);
653        T1 = X[1] = W[1];
654        ROUND_00_15(1, h, a, b, c, d, e, f, g);
655        T1 = X[2] = W[2];
656        ROUND_00_15(2, g, h, a, b, c, d, e, f);
657        T1 = X[3] = W[3];
658        ROUND_00_15(3, f, g, h, a, b, c, d, e);
659        T1 = X[4] = W[4];
660        ROUND_00_15(4, e, f, g, h, a, b, c, d);
661        T1 = X[5] = W[5];
662        ROUND_00_15(5, d, e, f, g, h, a, b, c);
663        T1 = X[6] = W[6];
664        ROUND_00_15(6, c, d, e, f, g, h, a, b);
665        T1 = X[7] = W[7];
666        ROUND_00_15(7, b, c, d, e, f, g, h, a);
667        T1 = X[8] = W[8];
668        ROUND_00_15(8, a, b, c, d, e, f, g, h);
669        T1 = X[9] = W[9];
670        ROUND_00_15(9, h, a, b, c, d, e, f, g);
671        T1 = X[10] = W[10];
672        ROUND_00_15(10, g, h, a, b, c, d, e, f);
673        T1 = X[11] = W[11];
674        ROUND_00_15(11, f, g, h, a, b, c, d, e);
675        T1 = X[12] = W[12];
676        ROUND_00_15(12, e, f, g, h, a, b, c, d);
677        T1 = X[13] = W[13];
678        ROUND_00_15(13, d, e, f, g, h, a, b, c);
679        T1 = X[14] = W[14];
680        ROUND_00_15(14, c, d, e, f, g, h, a, b);
681        T1 = X[15] = W[15];
682        ROUND_00_15(15, b, c, d, e, f, g, h, a);
683#  else
684        T1 = X[0] = PULL64(W[0]);
685        ROUND_00_15(0, a, b, c, d, e, f, g, h);
686        T1 = X[1] = PULL64(W[1]);
687        ROUND_00_15(1, h, a, b, c, d, e, f, g);
688        T1 = X[2] = PULL64(W[2]);
689        ROUND_00_15(2, g, h, a, b, c, d, e, f);
690        T1 = X[3] = PULL64(W[3]);
691        ROUND_00_15(3, f, g, h, a, b, c, d, e);
692        T1 = X[4] = PULL64(W[4]);
693        ROUND_00_15(4, e, f, g, h, a, b, c, d);
694        T1 = X[5] = PULL64(W[5]);
695        ROUND_00_15(5, d, e, f, g, h, a, b, c);
696        T1 = X[6] = PULL64(W[6]);
697        ROUND_00_15(6, c, d, e, f, g, h, a, b);
698        T1 = X[7] = PULL64(W[7]);
699        ROUND_00_15(7, b, c, d, e, f, g, h, a);
700        T1 = X[8] = PULL64(W[8]);
701        ROUND_00_15(8, a, b, c, d, e, f, g, h);
702        T1 = X[9] = PULL64(W[9]);
703        ROUND_00_15(9, h, a, b, c, d, e, f, g);
704        T1 = X[10] = PULL64(W[10]);
705        ROUND_00_15(10, g, h, a, b, c, d, e, f);
706        T1 = X[11] = PULL64(W[11]);
707        ROUND_00_15(11, f, g, h, a, b, c, d, e);
708        T1 = X[12] = PULL64(W[12]);
709        ROUND_00_15(12, e, f, g, h, a, b, c, d);
710        T1 = X[13] = PULL64(W[13]);
711        ROUND_00_15(13, d, e, f, g, h, a, b, c);
712        T1 = X[14] = PULL64(W[14]);
713        ROUND_00_15(14, c, d, e, f, g, h, a, b);
714        T1 = X[15] = PULL64(W[15]);
715        ROUND_00_15(15, b, c, d, e, f, g, h, a);
716#  endif
717
718        for (i = 16; i < 80; i += 16) {
719            ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
720            ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
721            ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
722            ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
723            ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
724            ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
725            ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
726            ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
727            ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
728            ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
729            ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
730            ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
731            ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
732            ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
733            ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
734            ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
735        }
736
737        ctx->h[0] += a;
738        ctx->h[1] += b;
739        ctx->h[2] += c;
740        ctx->h[3] += d;
741        ctx->h[4] += e;
742        ctx->h[5] += f;
743        ctx->h[6] += g;
744        ctx->h[7] += h;
745
746        W += SHA_LBLOCK;
747    }
748}
749
750# endif
751
752#endif                         /* SHA512_ASM */
753