1/* vi: set sw=4 ts=4: */ 2/* 3 * Based on shasum from http://www.netsw.org/crypto/hash/ 4 * Majorly hacked up to use Dr Brian Gladman's sha1 code 5 * 6 * Copyright (C) 2002 Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK. 7 * Copyright (C) 2003 Glenn L. McGrath 8 * Copyright (C) 2003 Erik Andersen 9 * 10 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 11 * 12 * --------------------------------------------------------------------------- 13 * Issue Date: 10/11/2002 14 * 15 * This is a byte oriented version of SHA1 that operates on arrays of bytes 16 * stored in memory. It runs at 22 cycles per byte on a Pentium P4 processor 17 * 18 * --------------------------------------------------------------------------- 19 * 20 * SHA256 and SHA512 parts are: 21 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>. 22 * Shrank by Denys Vlasenko. 23 * 24 * --------------------------------------------------------------------------- 25 * 26 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c 27 * and replace "4096" with something like "2000 + time(NULL) % 2097", 28 * then rebuild and compare "shaNNNsum bigfile" results. 29 */ 30 31#include "libbb.h" 32 33#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n)))) 34#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n)))) 35/* for sha512: */ 36#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n)))) 37#if BB_LITTLE_ENDIAN 38static inline uint64_t hton64(uint64_t v) 39{ 40 return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32); 41} 42#else 43#define hton64(v) (v) 44#endif 45#define ntoh64(v) hton64(v) 46 47/* To check alignment gcc has an appropriate operator. Other 48 compilers don't. */ 49#if defined(__GNUC__) && __GNUC__ >= 2 50# define UNALIGNED_P(p,type) (((uintptr_t) p) % __alignof__(type) != 0) 51#else 52# define UNALIGNED_P(p,type) (((uintptr_t) p) % sizeof(type) != 0) 53#endif 54 55 56/* Some arch headers have conflicting defines */ 57#undef ch 58#undef parity 59#undef maj 60#undef rnd 61 62static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) 63{ 64 unsigned t; 65 uint32_t W[80], a, b, c, d, e; 66 const uint32_t *words = (uint32_t*) ctx->wbuffer; 67 68 for (t = 0; t < 16; ++t) { 69 W[t] = ntohl(*words); 70 words++; 71 } 72 73 for (/*t = 16*/; t < 80; ++t) { 74 uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16]; 75 W[t] = rotl32(T, 1); 76 } 77 78 a = ctx->hash[0]; 79 b = ctx->hash[1]; 80 c = ctx->hash[2]; 81 d = ctx->hash[3]; 82 e = ctx->hash[4]; 83 84/* Reverse byte order in 32-bit words */ 85#define ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) 86#define parity(x,y,z) ((x) ^ (y) ^ (z)) 87#define maj(x,y,z) (((x) & (y)) | ((z) & ((x) | (y)))) 88/* A normal version as set out in the FIPS. This version uses */ 89/* partial loop unrolling and is optimised for the Pentium 4 */ 90#define rnd(f,k) \ 91 do { \ 92 uint32_t T = a; \ 93 a = rotl32(a, 5) + f(b, c, d) + e + k + W[t]; \ 94 e = d; \ 95 d = c; \ 96 c = rotl32(b, 30); \ 97 b = T; \ 98 } while (0) 99 100 for (t = 0; t < 20; ++t) 101 rnd(ch, 0x5a827999); 102 103 for (/*t = 20*/; t < 40; ++t) 104 rnd(parity, 0x6ed9eba1); 105 106 for (/*t = 40*/; t < 60; ++t) 107 rnd(maj, 0x8f1bbcdc); 108 109 for (/*t = 60*/; t < 80; ++t) 110 rnd(parity, 0xca62c1d6); 111#undef ch 112#undef parity 113#undef maj 114#undef rnd 115 116 ctx->hash[0] += a; 117 ctx->hash[1] += b; 118 ctx->hash[2] += c; 119 ctx->hash[3] += d; 120 ctx->hash[4] += e; 121} 122 123/* Constants for SHA512 from FIPS 180-2:4.2.3. 124 * SHA256 constants from FIPS 180-2:4.2.2 125 * are the most significant half of first 64 elements 126 * of the same array. 127 */ 128static const uint64_t sha_K[80] = { 129 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 130 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, 131 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, 132 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 133 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, 134 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, 135 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 136 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, 137 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, 138 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 139 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, 140 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, 141 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 142 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, 143 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, 144 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 145 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, 146 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, 147 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 148 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, 149 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, 150 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 151 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, 152 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, 153 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 154 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, 155 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, 156 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 157 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, 158 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, 159 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 160 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, 161 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */ 162 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 163 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, 164 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, 165 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 166 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, 167 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 168 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL 169}; 170 171#undef Ch 172#undef Maj 173#undef S0 174#undef S1 175#undef R0 176#undef R1 177 178static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx) 179{ 180 unsigned t; 181 uint32_t W[64], a, b, c, d, e, f, g, h; 182 const uint32_t *words = (uint32_t*) ctx->wbuffer; 183 184 /* Operators defined in FIPS 180-2:4.1.2. */ 185#define Ch(x, y, z) ((x & y) ^ (~x & z)) 186#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) 187#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22)) 188#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25)) 189#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3)) 190#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10)) 191 192 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */ 193 for (t = 0; t < 16; ++t) { 194 W[t] = ntohl(*words); 195 words++; 196 } 197 198 for (/*t = 16*/; t < 64; ++t) 199 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; 200 201 a = ctx->hash[0]; 202 b = ctx->hash[1]; 203 c = ctx->hash[2]; 204 d = ctx->hash[3]; 205 e = ctx->hash[4]; 206 f = ctx->hash[5]; 207 g = ctx->hash[6]; 208 h = ctx->hash[7]; 209 210 /* The actual computation according to FIPS 180-2:6.2.2 step 3. */ 211 for (t = 0; t < 64; ++t) { 212 /* Need to fetch upper half of sha_K[t] 213 * (I hope compiler is clever enough to just fetch 214 * upper half) 215 */ 216 uint32_t K_t = sha_K[t] >> 32; 217 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t]; 218 uint32_t T2 = S0(a) + Maj(a, b, c); 219 h = g; 220 g = f; 221 f = e; 222 e = d + T1; 223 d = c; 224 c = b; 225 b = a; 226 a = T1 + T2; 227 } 228#undef Ch 229#undef Maj 230#undef S0 231#undef S1 232#undef R0 233#undef R1 234 /* Add the starting values of the context according to FIPS 180-2:6.2.2 235 step 4. */ 236 ctx->hash[0] += a; 237 ctx->hash[1] += b; 238 ctx->hash[2] += c; 239 ctx->hash[3] += d; 240 ctx->hash[4] += e; 241 ctx->hash[5] += f; 242 ctx->hash[6] += g; 243 ctx->hash[7] += h; 244} 245 246static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx) 247{ 248 unsigned t; 249 uint64_t W[80]; 250 /* On i386, having assignments here (not later as sha256 does) 251 * produces 99 bytes smaller code with gcc 4.3.1 252 */ 253 uint64_t a = ctx->hash[0]; 254 uint64_t b = ctx->hash[1]; 255 uint64_t c = ctx->hash[2]; 256 uint64_t d = ctx->hash[3]; 257 uint64_t e = ctx->hash[4]; 258 uint64_t f = ctx->hash[5]; 259 uint64_t g = ctx->hash[6]; 260 uint64_t h = ctx->hash[7]; 261 const uint64_t *words = (uint64_t*) ctx->wbuffer; 262 263 /* Operators defined in FIPS 180-2:4.1.2. */ 264#define Ch(x, y, z) ((x & y) ^ (~x & z)) 265#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) 266#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39)) 267#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41)) 268#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7)) 269#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6)) 270 271 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */ 272 for (t = 0; t < 16; ++t) { 273 W[t] = ntoh64(*words); 274 words++; 275 } 276 for (/*t = 16*/; t < 80; ++t) 277 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16]; 278 279 /* The actual computation according to FIPS 180-2:6.3.2 step 3. */ 280 for (t = 0; t < 80; ++t) { 281 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t]; 282 uint64_t T2 = S0(a) + Maj(a, b, c); 283 h = g; 284 g = f; 285 f = e; 286 e = d + T1; 287 d = c; 288 c = b; 289 b = a; 290 a = T1 + T2; 291 } 292#undef Ch 293#undef Maj 294#undef S0 295#undef S1 296#undef R0 297#undef R1 298 /* Add the starting values of the context according to FIPS 180-2:6.3.2 299 step 4. */ 300 ctx->hash[0] += a; 301 ctx->hash[1] += b; 302 ctx->hash[2] += c; 303 ctx->hash[3] += d; 304 ctx->hash[4] += e; 305 ctx->hash[5] += f; 306 ctx->hash[6] += g; 307 ctx->hash[7] += h; 308} 309 310 311void FAST_FUNC sha1_begin(sha1_ctx_t *ctx) 312{ 313 ctx->hash[0] = 0x67452301; 314 ctx->hash[1] = 0xefcdab89; 315 ctx->hash[2] = 0x98badcfe; 316 ctx->hash[3] = 0x10325476; 317 ctx->hash[4] = 0xc3d2e1f0; 318 ctx->total64 = 0; 319 ctx->process_block = sha1_process_block64; 320} 321 322static const uint32_t init256[] = { 323 0x6a09e667, 324 0xbb67ae85, 325 0x3c6ef372, 326 0xa54ff53a, 327 0x510e527f, 328 0x9b05688c, 329 0x1f83d9ab, 330 0x5be0cd19 331}; 332static const uint32_t init512_lo[] = { 333 0xf3bcc908, 334 0x84caa73b, 335 0xfe94f82b, 336 0x5f1d36f1, 337 0xade682d1, 338 0x2b3e6c1f, 339 0xfb41bd6b, 340 0x137e2179 341}; 342 343/* Initialize structure containing state of computation. 344 (FIPS 180-2:5.3.2) */ 345void FAST_FUNC sha256_begin(sha256_ctx_t *ctx) 346{ 347 memcpy(ctx->hash, init256, sizeof(init256)); 348 ctx->total64 = 0; 349 ctx->process_block = sha256_process_block64; 350} 351 352/* Initialize structure containing state of computation. 353 (FIPS 180-2:5.3.3) */ 354void FAST_FUNC sha512_begin(sha512_ctx_t *ctx) 355{ 356 int i; 357 for (i = 0; i < 8; i++) 358 ctx->hash[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i]; 359 ctx->total64[0] = ctx->total64[1] = 0; 360} 361 362 363/* Used also for sha256 */ 364void FAST_FUNC sha1_hash(const void *buffer, size_t len, sha1_ctx_t *ctx) 365{ 366 unsigned in_buf = ctx->total64 & 63; 367 unsigned add = 64 - in_buf; 368 369 ctx->total64 += len; 370 371 while (len >= add) { /* transfer whole blocks while possible */ 372 memcpy(ctx->wbuffer + in_buf, buffer, add); 373 buffer = (const char *)buffer + add; 374 len -= add; 375 add = 64; 376 in_buf = 0; 377 ctx->process_block(ctx); 378 } 379 380 memcpy(ctx->wbuffer + in_buf, buffer, len); 381} 382 383void FAST_FUNC sha512_hash(const void *buffer, size_t len, sha512_ctx_t *ctx) 384{ 385 unsigned in_buf = ctx->total64[0] & 127; 386 unsigned add = 128 - in_buf; 387 388 /* First increment the byte count. FIPS 180-2 specifies the possible 389 length of the file up to 2^128 _bits_. 390 We compute the number of _bytes_ and convert to bits later. */ 391 ctx->total64[0] += len; 392 if (ctx->total64[0] < len) 393 ctx->total64[1]++; 394 395 while (len >= add) { /* transfer whole blocks while possible */ 396 memcpy(ctx->wbuffer + in_buf, buffer, add); 397 buffer = (const char *)buffer + add; 398 len -= add; 399 add = 128; 400 in_buf = 0; 401 sha512_process_block128(ctx); 402 } 403 404 memcpy(ctx->wbuffer + in_buf, buffer, len); 405} 406 407 408/* Used also for sha256 */ 409void FAST_FUNC sha1_end(void *resbuf, sha1_ctx_t *ctx) 410{ 411 unsigned pad, in_buf; 412 413 in_buf = ctx->total64 & 63; 414 /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */ 415 ctx->wbuffer[in_buf++] = 0x80; 416 417 /* This loop iterates either once or twice, no more, no less */ 418 while (1) { 419 pad = 64 - in_buf; 420 memset(ctx->wbuffer + in_buf, 0, pad); 421 in_buf = 0; 422 /* Do we have enough space for the length count? */ 423 if (pad >= 8) { 424 /* Store the 64-bit counter of bits in the buffer in BE format */ 425 uint64_t t = ctx->total64 << 3; 426 t = hton64(t); 427 /* wbuffer is suitably aligned for this */ 428 *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t; 429 } 430 ctx->process_block(ctx); 431 if (pad >= 8) 432 break; 433 } 434 435 in_buf = (ctx->process_block == sha1_process_block64) ? 5 : 8; 436 /* This way we do not impose alignment constraints on resbuf: */ 437 if (BB_LITTLE_ENDIAN) { 438 unsigned i; 439 for (i = 0; i < in_buf; ++i) 440 ctx->hash[i] = htonl(ctx->hash[i]); 441 } 442 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * in_buf); 443} 444 445void FAST_FUNC sha512_end(void *resbuf, sha512_ctx_t *ctx) 446{ 447 unsigned pad, in_buf; 448 449 in_buf = ctx->total64[0] & 127; 450 /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... 451 * (FIPS 180-2:5.1.2) 452 */ 453 ctx->wbuffer[in_buf++] = 0x80; 454 455 while (1) { 456 pad = 128 - in_buf; 457 memset(ctx->wbuffer + in_buf, 0, pad); 458 in_buf = 0; 459 if (pad >= 16) { 460 /* Store the 128-bit counter of bits in the buffer in BE format */ 461 uint64_t t; 462 t = ctx->total64[0] << 3; 463 t = hton64(t); 464 *(uint64_t *) (&ctx->wbuffer[128 - 8]) = t; 465 t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61); 466 t = hton64(t); 467 *(uint64_t *) (&ctx->wbuffer[128 - 16]) = t; 468 } 469 sha512_process_block128(ctx); 470 if (pad >= 16) 471 break; 472 } 473 474 if (BB_LITTLE_ENDIAN) { 475 unsigned i; 476 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i) 477 ctx->hash[i] = hton64(ctx->hash[i]); 478 } 479 memcpy(resbuf, ctx->hash, sizeof(ctx->hash)); 480} 481