sha512.c revision 1.21
1/* $OpenBSD: sha512.c,v 1.21 2023/03/27 10:13:08 jsing Exp $ */ 2/* ==================================================================== 3 * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * 17 * 3. All advertising materials mentioning features or use of this 18 * software must display the following acknowledgment: 19 * "This product includes software developed by the OpenSSL Project 20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 21 * 22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 23 * endorse or promote products derived from this software without 24 * prior written permission. For written permission, please contact 25 * openssl-core@openssl.org. 26 * 27 * 5. Products derived from this software may not be called "OpenSSL" 28 * nor may "OpenSSL" appear in their names without prior written 29 * permission of the OpenSSL Project. 30 * 31 * 6. Redistributions of any form whatsoever must retain the following 32 * acknowledgment: 33 * "This product includes software developed by the OpenSSL Project 34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 35 * 36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 47 * OF THE POSSIBILITY OF SUCH DAMAGE. 48 * ==================================================================== 49 * 50 * This product includes cryptographic software written by Eric Young 51 * (eay@cryptsoft.com). This product includes software written by Tim 52 * Hudson (tjh@cryptsoft.com). 53 */ 54 55#include <endian.h> 56#include <stdlib.h> 57#include <string.h> 58 59#include <openssl/opensslconf.h> 60 61#include <openssl/crypto.h> 62#include <openssl/sha.h> 63 64#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) 65/* 66 * IMPLEMENTATION NOTES. 67 * 68 * As you might have noticed 32-bit hash algorithms: 69 * 70 * - permit SHA_LONG to be wider than 32-bit (case on CRAY); 71 * - optimized versions implement two transform functions: one operating 72 * on [aligned] data in host byte order and one - on data in input 73 * stream byte order; 74 * - share common byte-order neutral collector and padding function 75 * implementations, ../md32_common.h; 76 * 77 * Neither of the above applies to this SHA-512 implementations. Reasons 78 * [in reverse order] are: 79 * 80 * - it's the only 64-bit hash algorithm for the moment of this writing, 81 * there is no need for common collector/padding implementation [yet]; 82 * - by supporting only one transform function [which operates on 83 * *aligned* data in input stream byte order, big-endian in this case] 84 * we minimize burden of maintenance in two ways: a) collector/padding 85 * function is simpler; b) only one transform function to stare at; 86 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to 87 * apply a number of optimizations to mitigate potential performance 88 * penalties caused by previous design decision; 89 * 90 * Caveat lector. 91 * 92 * Implementation relies on the fact that "long long" is 64-bit on 93 * both 32- and 64-bit platforms. If some compiler vendor comes up 94 * with 128-bit long long, adjustment to sha.h would be required. 95 * As this implementation relies on 64-bit integer type, it's totally 96 * inappropriate for platforms which don't support it, most notably 97 * 16-bit platforms. 98 * <appro@fy.chalmers.se> 99 */ 100 101#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM) 102#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA 103#endif 104 105int 106SHA384_Init(SHA512_CTX *c) 107{ 108 c->h[0] = U64(0xcbbb9d5dc1059ed8); 109 c->h[1] = U64(0x629a292a367cd507); 110 c->h[2] = U64(0x9159015a3070dd17); 111 c->h[3] = U64(0x152fecd8f70e5939); 112 c->h[4] = U64(0x67332667ffc00b31); 113 c->h[5] = U64(0x8eb44a8768581511); 114 c->h[6] = U64(0xdb0c2e0d64f98fa7); 115 c->h[7] = U64(0x47b5481dbefa4fa4); 116 117 c->Nl = 0; 118 c->Nh = 0; 119 c->num = 0; 120 c->md_len = SHA384_DIGEST_LENGTH; 121 return 1; 122} 123 124int 125SHA512_Init(SHA512_CTX *c) 126{ 127 c->h[0] = U64(0x6a09e667f3bcc908); 128 c->h[1] = U64(0xbb67ae8584caa73b); 129 c->h[2] = U64(0x3c6ef372fe94f82b); 130 c->h[3] = U64(0xa54ff53a5f1d36f1); 131 c->h[4] = U64(0x510e527fade682d1); 132 c->h[5] = U64(0x9b05688c2b3e6c1f); 133 c->h[6] = U64(0x1f83d9abfb41bd6b); 134 c->h[7] = U64(0x5be0cd19137e2179); 135 136 c->Nl = 0; 137 c->Nh = 0; 138 c->num = 0; 139 c->md_len = SHA512_DIGEST_LENGTH; 140 return 1; 141} 142 143#ifndef SHA512_ASM 144static 145#endif 146void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num); 147 148int 149SHA512_Final(unsigned char *md, SHA512_CTX *c) 150{ 151 unsigned char *p = (unsigned char *)c->u.p; 152 size_t n = c->num; 153 154 p[n]=0x80; /* There always is a room for one */ 155 n++; 156 if (n > (sizeof(c->u) - 16)) { 157 memset(p + n, 0, sizeof(c->u) - n); 158 n = 0; 159 sha512_block_data_order(c, p, 1); 160 } 161 162 memset (p + n, 0, sizeof(c->u) - 16 - n); 163#if BYTE_ORDER == BIG_ENDIAN 164 c->u.d[SHA_LBLOCK - 2] = c->Nh; 165 c->u.d[SHA_LBLOCK - 1] = c->Nl; 166#else 167 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl); 168 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8); 169 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16); 170 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24); 171 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32); 172 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40); 173 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48); 174 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56); 175 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh); 176 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8); 177 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16); 178 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24); 179 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32); 180 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40); 181 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48); 182 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56); 183#endif 184 185 sha512_block_data_order(c, p, 1); 186 187 if (md == 0) 188 return 0; 189 190 switch (c->md_len) { 191 /* Let compiler decide if it's appropriate to unroll... */ 192 case SHA384_DIGEST_LENGTH: 193 for (n = 0; n < SHA384_DIGEST_LENGTH/8; n++) { 194 SHA_LONG64 t = c->h[n]; 195 196 *(md++) = (unsigned char)(t >> 56); 197 *(md++) = (unsigned char)(t >> 48); 198 *(md++) = (unsigned char)(t >> 40); 199 *(md++) = (unsigned char)(t >> 32); 200 *(md++) = (unsigned char)(t >> 24); 201 *(md++) = (unsigned char)(t >> 16); 202 *(md++) = (unsigned char)(t >> 8); 203 *(md++) = (unsigned char)(t); 204 } 205 break; 206 case SHA512_DIGEST_LENGTH: 207 for (n = 0; n < SHA512_DIGEST_LENGTH/8; n++) { 208 SHA_LONG64 t = c->h[n]; 209 210 *(md++) = (unsigned char)(t >> 56); 211 *(md++) = (unsigned char)(t >> 48); 212 *(md++) = (unsigned char)(t >> 40); 213 *(md++) = (unsigned char)(t >> 32); 214 *(md++) = (unsigned char)(t >> 24); 215 *(md++) = (unsigned char)(t >> 16); 216 *(md++) = (unsigned char)(t >> 8); 217 *(md++) = (unsigned char)(t); 218 } 219 break; 220 /* ... as well as make sure md_len is not abused. */ 221 default: 222 return 0; 223 } 224 225 return 1; 226} 227 228int 229SHA384_Final(unsigned char *md, SHA512_CTX *c) 230{ 231 return SHA512_Final(md, c); 232} 233 234int 235SHA512_Update(SHA512_CTX *c, const void *_data, size_t len) 236{ 237 SHA_LONG64 l; 238 unsigned char *p = c->u.p; 239 const unsigned char *data = (const unsigned char *)_data; 240 241 if (len == 0) 242 return 1; 243 244 l = (c->Nl + (((SHA_LONG64)len) << 3))&U64(0xffffffffffffffff); 245 if (l < c->Nl) 246 c->Nh++; 247 if (sizeof(len) >= 8) 248 c->Nh += (((SHA_LONG64)len) >> 61); 249 c->Nl = l; 250 251 if (c->num != 0) { 252 size_t n = sizeof(c->u) - c->num; 253 254 if (len < n) { 255 memcpy(p + c->num, data, len); 256 c->num += (unsigned int)len; 257 return 1; 258 } else{ 259 memcpy(p + c->num, data, n); 260 c->num = 0; 261 len -= n; 262 data += n; 263 sha512_block_data_order(c, p, 1); 264 } 265 } 266 267 if (len >= sizeof(c->u)) { 268#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA 269 if ((size_t)data % sizeof(c->u.d[0]) != 0) { 270 while (len >= sizeof(c->u)) { 271 memcpy(p, data, sizeof(c->u)); 272 sha512_block_data_order(c, p, 1); 273 len -= sizeof(c->u); 274 data += sizeof(c->u); 275 } 276 } else 277#endif 278 { 279 sha512_block_data_order(c, data, len/sizeof(c->u)); 280 data += len; 281 len %= sizeof(c->u); 282 data -= len; 283 } 284 } 285 286 if (len != 0) { 287 memcpy(p, data, len); 288 c->num = (int)len; 289 } 290 291 return 1; 292} 293 294int 295SHA384_Update(SHA512_CTX *c, const void *data, size_t len) 296{ 297 return SHA512_Update(c, data, len); 298} 299 300void 301SHA512_Transform(SHA512_CTX *c, const unsigned char *data) 302{ 303#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA 304 if ((size_t)data % sizeof(c->u.d[0]) != 0) 305 memcpy(c->u.p, data, sizeof(c->u.p)), 306 data = c->u.p; 307#endif 308 sha512_block_data_order(c, data, 1); 309} 310 311unsigned char * 312SHA384(const unsigned char *d, size_t n, unsigned char *md) 313{ 314 SHA512_CTX c; 315 static unsigned char m[SHA384_DIGEST_LENGTH]; 316 317 if (md == NULL) 318 md = m; 319 320 SHA384_Init(&c); 321 SHA512_Update(&c, d, n); 322 SHA512_Final(md, &c); 323 324 explicit_bzero(&c, sizeof(c)); 325 326 return (md); 327} 328 329unsigned char * 330SHA512(const unsigned char *d, size_t n, unsigned char *md) 331{ 332 SHA512_CTX c; 333 static unsigned char m[SHA512_DIGEST_LENGTH]; 334 335 if (md == NULL) 336 md = m; 337 338 SHA512_Init(&c); 339 SHA512_Update(&c, d, n); 340 SHA512_Final(md, &c); 341 342 explicit_bzero(&c, sizeof(c)); 343 344 return (md); 345} 346 347#ifndef SHA512_ASM 348static const SHA_LONG64 K512[80] = { 349 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd), 350 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc), 351 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019), 352 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118), 353 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe), 354 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2), 355 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1), 356 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694), 357 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3), 358 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65), 359 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483), 360 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5), 361 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210), 362 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4), 363 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725), 364 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70), 365 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926), 366 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df), 367 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8), 368 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b), 369 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001), 370 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30), 371 U64(0xd192e819d6ef5218), U64(0xd69906245565a910), 372 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8), 373 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53), 374 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8), 375 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb), 376 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3), 377 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60), 378 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec), 379 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9), 380 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b), 381 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207), 382 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178), 383 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6), 384 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b), 385 U64(0x28db77f523047d84), U64(0x32caab7b40c72493), 386 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c), 387 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a), 388 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817), 389}; 390 391#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) 392# if defined(__x86_64) || defined(__x86_64__) 393# define ROTR(a,n) ({ SHA_LONG64 ret; \ 394 asm ("rorq %1,%0" \ 395 : "=r"(ret) \ 396 : "J"(n),"0"(a) \ 397 : "cc"); ret; }) 398# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \ 399 asm ("bswapq %0" \ 400 : "=r"(ret) \ 401 : "0"(ret)); ret; }) 402# elif (defined(__i386) || defined(__i386__)) 403# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\ 404 unsigned int hi=p[0],lo=p[1]; \ 405 asm ("bswapl %0; bswapl %1;" \ 406 : "=r"(lo),"=r"(hi) \ 407 : "0"(lo),"1"(hi)); \ 408 ((SHA_LONG64)hi)<<32|lo; }) 409# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64) 410# define ROTR(a,n) ({ SHA_LONG64 ret; \ 411 asm ("rotrdi %0,%1,%2" \ 412 : "=r"(ret) \ 413 : "r"(a),"K"(n)); ret; }) 414# endif 415#endif 416 417#ifndef PULL64 418#define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8)) 419#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7)) 420#endif 421 422#ifndef ROTR 423#define ROTR(x,s) (((x)>>s) | (x)<<(64-s)) 424#endif 425 426#define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 427#define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 428#define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) 429#define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) 430 431#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) 432#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 433 434 435#if defined(__i386) || defined(__i386__) || defined(_M_IX86) 436/* 437 * This code should give better results on 32-bit CPU with less than 438 * ~24 registers, both size and performance wise... 439 */ 440static void 441sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num) 442{ 443 const SHA_LONG64 *W = in; 444 SHA_LONG64 A, E, T; 445 SHA_LONG64 X[9 + 80], *F; 446 int i; 447 448 while (num--) { 449 450 F = X + 80; 451 A = ctx->h[0]; 452 F[1] = ctx->h[1]; 453 F[2] = ctx->h[2]; 454 F[3] = ctx->h[3]; 455 E = ctx->h[4]; 456 F[5] = ctx->h[5]; 457 F[6] = ctx->h[6]; 458 F[7] = ctx->h[7]; 459 460 for (i = 0; i < 16; i++, F--) { 461 T = PULL64(W[i]); 462 F[0] = A; 463 F[4] = E; 464 F[8] = T; 465 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; 466 E = F[3] + T; 467 A = T + Sigma0(A) + Maj(A, F[1], F[2]); 468 } 469 470 for (; i < 80; i++, F--) { 471 T = sigma0(F[8 + 16 - 1]); 472 T += sigma1(F[8 + 16 - 14]); 473 T += F[8 + 16] + F[8 + 16 - 9]; 474 475 F[0] = A; 476 F[4] = E; 477 F[8] = T; 478 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; 479 E = F[3] + T; 480 A = T + Sigma0(A) + Maj(A, F[1], F[2]); 481 } 482 483 ctx->h[0] += A; 484 ctx->h[1] += F[1]; 485 ctx->h[2] += F[2]; 486 ctx->h[3] += F[3]; 487 ctx->h[4] += E; 488 ctx->h[5] += F[5]; 489 ctx->h[6] += F[6]; 490 ctx->h[7] += F[7]; 491 492 W += SHA_LBLOCK; 493 } 494} 495 496#elif defined(OPENSSL_SMALL_FOOTPRINT) 497 498static void 499sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num) 500{ 501 const SHA_LONG64 *W = in; 502 SHA_LONG64 a, b,c, d,e, f,g, h,s0, s1, T1, T2; 503 SHA_LONG64 X[16]; 504 int i; 505 506 while (num--) { 507 508 a = ctx->h[0]; 509 b = ctx->h[1]; 510 c = ctx->h[2]; 511 d = ctx->h[3]; 512 e = ctx->h[4]; 513 f = ctx->h[5]; 514 g = ctx->h[6]; 515 h = ctx->h[7]; 516 517 for (i = 0; i < 16; i++) { 518#if BYTE_ORDER == BIG_ENDIAN 519 T1 = X[i] = W[i]; 520#else 521 T1 = X[i] = PULL64(W[i]); 522#endif 523 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; 524 T2 = Sigma0(a) + Maj(a, b, c); 525 h = g; 526 g = f; 527 f = e; 528 e = d + T1; 529 d = c; 530 c = b; 531 b = a; 532 a = T1 + T2; 533 } 534 535 for (; i < 80; i++) { 536 s0 = X[(i + 1)&0x0f]; 537 s0 = sigma0(s0); 538 s1 = X[(i + 14)&0x0f]; 539 s1 = sigma1(s1); 540 541 T1 = X[i&0xf] += s0 + s1 + X[(i + 9)&0xf]; 542 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; 543 T2 = Sigma0(a) + Maj(a, b, c); 544 h = g; 545 g = f; 546 f = e; 547 e = d + T1; 548 d = c; 549 c = b; 550 b = a; 551 a = T1 + T2; 552 } 553 554 ctx->h[0] += a; 555 ctx->h[1] += b; 556 ctx->h[2] += c; 557 ctx->h[3] += d; 558 ctx->h[4] += e; 559 ctx->h[5] += f; 560 ctx->h[6] += g; 561 ctx->h[7] += h; 562 563 W += SHA_LBLOCK; 564 } 565} 566 567#else 568 569#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \ 570 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \ 571 h = Sigma0(a) + Maj(a,b,c); \ 572 d += T1; h += T1; } while (0) 573 574#define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \ 575 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \ 576 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \ 577 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \ 578 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0) 579 580static void 581sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num) 582{ 583 const SHA_LONG64 *W = in; 584 SHA_LONG64 a, b,c, d,e, f,g, h,s0, s1, T1; 585 SHA_LONG64 X[16]; 586 int i; 587 588 while (num--) { 589 590 a = ctx->h[0]; 591 b = ctx->h[1]; 592 c = ctx->h[2]; 593 d = ctx->h[3]; 594 e = ctx->h[4]; 595 f = ctx->h[5]; 596 g = ctx->h[6]; 597 h = ctx->h[7]; 598 599#if BYTE_ORDER == BIG_ENDIAN 600 T1 = X[0] = W[0]; 601 ROUND_00_15(0, a,b, c,d, e,f, g, h); 602 T1 = X[1] = W[1]; 603 ROUND_00_15(1, h,a, b,c, d,e, f, g); 604 T1 = X[2] = W[2]; 605 ROUND_00_15(2, g,h, a,b, c,d, e, f); 606 T1 = X[3] = W[3]; 607 ROUND_00_15(3, f,g, h,a, b,c, d, e); 608 T1 = X[4] = W[4]; 609 ROUND_00_15(4, e,f, g,h, a,b, c, d); 610 T1 = X[5] = W[5]; 611 ROUND_00_15(5, d,e, f,g, h,a, b, c); 612 T1 = X[6] = W[6]; 613 ROUND_00_15(6, c,d, e,f, g,h, a, b); 614 T1 = X[7] = W[7]; 615 ROUND_00_15(7, b,c, d,e, f,g, h, a); 616 T1 = X[8] = W[8]; 617 ROUND_00_15(8, a,b, c,d, e,f, g, h); 618 T1 = X[9] = W[9]; 619 ROUND_00_15(9, h,a, b,c, d,e, f, g); 620 T1 = X[10] = W[10]; 621 ROUND_00_15(10, g,h, a,b, c,d, e, f); 622 T1 = X[11] = W[11]; 623 ROUND_00_15(11, f,g, h,a, b,c, d, e); 624 T1 = X[12] = W[12]; 625 ROUND_00_15(12, e,f, g,h, a,b, c, d); 626 T1 = X[13] = W[13]; 627 ROUND_00_15(13, d,e, f,g, h,a, b, c); 628 T1 = X[14] = W[14]; 629 ROUND_00_15(14, c,d, e,f, g,h, a, b); 630 T1 = X[15] = W[15]; 631 ROUND_00_15(15, b,c, d,e, f,g, h, a); 632#else 633 T1 = X[0] = PULL64(W[0]); 634 ROUND_00_15(0, a,b, c,d, e,f, g, h); 635 T1 = X[1] = PULL64(W[1]); 636 ROUND_00_15(1, h,a, b,c, d,e, f, g); 637 T1 = X[2] = PULL64(W[2]); 638 ROUND_00_15(2, g,h, a,b, c,d, e, f); 639 T1 = X[3] = PULL64(W[3]); 640 ROUND_00_15(3, f,g, h,a, b,c, d, e); 641 T1 = X[4] = PULL64(W[4]); 642 ROUND_00_15(4, e,f, g,h, a,b, c, d); 643 T1 = X[5] = PULL64(W[5]); 644 ROUND_00_15(5, d,e, f,g, h,a, b, c); 645 T1 = X[6] = PULL64(W[6]); 646 ROUND_00_15(6, c,d, e,f, g,h, a, b); 647 T1 = X[7] = PULL64(W[7]); 648 ROUND_00_15(7, b,c, d,e, f,g, h, a); 649 T1 = X[8] = PULL64(W[8]); 650 ROUND_00_15(8, a,b, c,d, e,f, g, h); 651 T1 = X[9] = PULL64(W[9]); 652 ROUND_00_15(9, h,a, b,c, d,e, f, g); 653 T1 = X[10] = PULL64(W[10]); 654 ROUND_00_15(10, g,h, a,b, c,d, e, f); 655 T1 = X[11] = PULL64(W[11]); 656 ROUND_00_15(11, f,g, h,a, b,c, d, e); 657 T1 = X[12] = PULL64(W[12]); 658 ROUND_00_15(12, e,f, g,h, a,b, c, d); 659 T1 = X[13] = PULL64(W[13]); 660 ROUND_00_15(13, d,e, f,g, h,a, b, c); 661 T1 = X[14] = PULL64(W[14]); 662 ROUND_00_15(14, c,d, e,f, g,h, a, b); 663 T1 = X[15] = PULL64(W[15]); 664 ROUND_00_15(15, b,c, d,e, f,g, h, a); 665#endif 666 667 for (i = 16; i < 80; i += 16) { 668 ROUND_16_80(i, 0, a,b, c,d, e,f, g,h, X); 669 ROUND_16_80(i, 1, h,a, b,c, d,e, f,g, X); 670 ROUND_16_80(i, 2, g,h, a,b, c,d, e,f, X); 671 ROUND_16_80(i, 3, f,g, h,a, b,c, d,e, X); 672 ROUND_16_80(i, 4, e,f, g,h, a,b, c,d, X); 673 ROUND_16_80(i, 5, d,e, f,g, h,a, b,c, X); 674 ROUND_16_80(i, 6, c,d, e,f, g,h, a,b, X); 675 ROUND_16_80(i, 7, b,c, d,e, f,g, h,a, X); 676 ROUND_16_80(i, 8, a,b, c,d, e,f, g,h, X); 677 ROUND_16_80(i, 9, h,a, b,c, d,e, f,g, X); 678 ROUND_16_80(i, 10, g,h, a,b, c,d, e,f, X); 679 ROUND_16_80(i, 11, f,g, h,a, b,c, d,e, X); 680 ROUND_16_80(i, 12, e,f, g,h, a,b, c,d, X); 681 ROUND_16_80(i, 13, d,e, f,g, h,a, b,c, X); 682 ROUND_16_80(i, 14, c,d, e,f, g,h, a,b, X); 683 ROUND_16_80(i, 15, b,c, d,e, f,g, h,a, X); 684 } 685 686 ctx->h[0] += a; 687 ctx->h[1] += b; 688 ctx->h[2] += c; 689 ctx->h[3] += d; 690 ctx->h[4] += e; 691 ctx->h[5] += f; 692 ctx->h[6] += g; 693 ctx->h[7] += h; 694 695 W += SHA_LBLOCK; 696 } 697} 698 699#endif 700 701#endif /* SHA512_ASM */ 702 703#endif /* !OPENSSL_NO_SHA512 */ 704