1/* 2 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6/* 7 * The basic framework for this code came from the reference 8 * implementation for MD5. That implementation is Copyright (C) 9 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved. 10 * 11 * License to copy and use this software is granted provided that it 12 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest 13 * Algorithm" in all material mentioning or referencing this software 14 * or this function. 15 * 16 * License is also granted to make and use derivative works provided 17 * that such works are identified as "derived from the RSA Data 18 * Security, Inc. MD5 Message-Digest Algorithm" in all material 19 * mentioning or referencing the derived work. 20 * 21 * RSA Data Security, Inc. makes no representations concerning either 22 * the merchantability of this software or the suitability of this 23 * software for any particular purpose. It is provided "as is" 24 * without express or implied warranty of any kind. 25 * 26 * These notices must be retained in any copies of any part of this 27 * documentation and/or software. 28 * 29 * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1 30 * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm 31 * Not as fast as one would like -- further optimizations are encouraged 32 * and appreciated. 33 */ 34 35#include <sys/zfs_context.h> 36#include <sha1/sha1.h> 37#include <sha1/sha1_consts.h> 38 39#ifdef _LITTLE_ENDIAN 40#include <sys/byteorder.h> 41#define HAVE_HTONL 42#endif 43 44#define _RESTRICT_KYWD 45 46static void Encode(uint8_t *, const uint32_t *, size_t); 47 48#if defined(__sparc) 49 50#define SHA1_TRANSFORM(ctx, in) \ 51 SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \ 52 (ctx)->state[3], (ctx)->state[4], (ctx), (in)) 53 54static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, 55 SHA1_CTX *, const uint8_t *); 56 57#elif defined(__amd64) 58 59#define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1) 60#define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \ 61 (in), (num)) 62 63void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks); 64 65#else 66 67#define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in)) 68 69static void SHA1Transform(SHA1_CTX *, const uint8_t *); 70 71#endif 72 73 74static uint8_t PADDING[64] = { 0x80, /* all zeros */ }; 75 76/* 77 * F, G, and H are the basic SHA1 functions. 78 */ 79#define F(b, c, d) (((b) & (c)) | ((~b) & (d))) 80#define G(b, c, d) ((b) ^ (c) ^ (d)) 81#define H(b, c, d) (((b) & (c)) | (((b)|(c)) & (d))) 82 83/* 84 * SHA1Init() 85 * 86 * purpose: initializes the sha1 context and begins and sha1 digest operation 87 * input: SHA1_CTX * : the context to initializes. 88 * output: void 89 */ 90 91void 92SHA1Init(SHA1_CTX *ctx) 93{ 94 ctx->count[0] = ctx->count[1] = 0; 95 96 /* 97 * load magic initialization constants. Tell lint 98 * that these constants are unsigned by using U. 99 */ 100 101 ctx->state[0] = 0x67452301U; 102 ctx->state[1] = 0xefcdab89U; 103 ctx->state[2] = 0x98badcfeU; 104 ctx->state[3] = 0x10325476U; 105 ctx->state[4] = 0xc3d2e1f0U; 106} 107 108void 109SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len) 110{ 111 uint32_t i, buf_index, buf_len; 112 const uint8_t *input = inptr; 113#if defined(__amd64) 114 uint32_t block_count; 115#endif /* __amd64 */ 116 117 /* check for noop */ 118 if (input_len == 0) 119 return; 120 121 /* compute number of bytes mod 64 */ 122 buf_index = (ctx->count[1] >> 3) & 0x3F; 123 124 /* update number of bits */ 125 if ((ctx->count[1] += (input_len << 3)) < (input_len << 3)) 126 ctx->count[0]++; 127 128 ctx->count[0] += (input_len >> 29); 129 130 buf_len = 64 - buf_index; 131 132 /* transform as many times as possible */ 133 i = 0; 134 if (input_len >= buf_len) { 135 136 /* 137 * general optimization: 138 * 139 * only do initial bcopy() and SHA1Transform() if 140 * buf_index != 0. if buf_index == 0, we're just 141 * wasting our time doing the bcopy() since there 142 * wasn't any data left over from a previous call to 143 * SHA1Update(). 144 */ 145 146 if (buf_index) { 147 bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); 148 SHA1_TRANSFORM(ctx, ctx->buf_un.buf8); 149 i = buf_len; 150 } 151 152#if !defined(__amd64) 153 for (; i + 63 < input_len; i += 64) 154 SHA1_TRANSFORM(ctx, &input[i]); 155#else 156 block_count = (input_len - i) >> 6; 157 if (block_count > 0) { 158 SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count); 159 i += block_count << 6; 160 } 161#endif /* !__amd64 */ 162 163 /* 164 * general optimization: 165 * 166 * if i and input_len are the same, return now instead 167 * of calling bcopy(), since the bcopy() in this case 168 * will be an expensive nop. 169 */ 170 171 if (input_len == i) 172 return; 173 174 buf_index = 0; 175 } 176 177 /* buffer remaining input */ 178 bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i); 179} 180 181/* 182 * SHA1Final() 183 * 184 * purpose: ends an sha1 digest operation, finalizing the message digest and 185 * zeroing the context. 186 * input: uchar_t * : A buffer to store the digest. 187 * : The function actually uses void* because many 188 * : callers pass things other than uchar_t here. 189 * SHA1_CTX * : the context to finalize, save, and zero 190 * output: void 191 */ 192 193void 194SHA1Final(void *digest, SHA1_CTX *ctx) 195{ 196 uint8_t bitcount_be[sizeof (ctx->count)]; 197 uint32_t index = (ctx->count[1] >> 3) & 0x3f; 198 199 /* store bit count, big endian */ 200 Encode(bitcount_be, ctx->count, sizeof (bitcount_be)); 201 202 /* pad out to 56 mod 64 */ 203 SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index); 204 205 /* append length (before padding) */ 206 SHA1Update(ctx, bitcount_be, sizeof (bitcount_be)); 207 208 /* store state in digest */ 209 Encode(digest, ctx->state, sizeof (ctx->state)); 210 211 /* zeroize sensitive information */ 212 bzero(ctx, sizeof (*ctx)); 213} 214 215 216#if !defined(__amd64) 217 218typedef uint32_t sha1word; 219 220/* 221 * sparc optimization: 222 * 223 * on the sparc, we can load big endian 32-bit data easily. note that 224 * special care must be taken to ensure the address is 32-bit aligned. 225 * in the interest of speed, we don't check to make sure, since 226 * careful programming can guarantee this for us. 227 */ 228 229#if defined(_ZFS_BIG_ENDIAN) 230#define LOAD_BIG_32(addr) (*(uint32_t *)(addr)) 231 232#elif defined(HAVE_HTONL) 233#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr))) 234 235#else 236#define LOAD_BIG_32(addr) BE_32(*((uint32_t *)(addr))) 237#endif /* _BIG_ENDIAN */ 238 239/* 240 * SHA1Transform() 241 */ 242#if defined(W_ARRAY) 243#define W(n) w[n] 244#else /* !defined(W_ARRAY) */ 245#define W(n) w_ ## n 246#endif /* !defined(W_ARRAY) */ 247 248/* 249 * ROTATE_LEFT rotates x left n bits. 250 */ 251 252#if defined(__GNUC__) && defined(_LP64) 253static __inline__ uint64_t 254ROTATE_LEFT(uint64_t value, uint32_t n) 255{ 256 uint32_t t32; 257 258 t32 = (uint32_t)value; 259 return ((t32 << n) | (t32 >> (32 - n))); 260} 261 262#else 263 264#define ROTATE_LEFT(x, n) \ 265 (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n)))) 266 267#endif 268 269#if defined(__sparc) 270 271 272/* 273 * sparc register window optimization: 274 * 275 * `a', `b', `c', `d', and `e' are passed into SHA1Transform 276 * explicitly since it increases the number of registers available to 277 * the compiler. under this scheme, these variables can be held in 278 * %i0 - %i4, which leaves more local and out registers available. 279 * 280 * purpose: sha1 transformation -- updates the digest based on `block' 281 * input: uint32_t : bytes 1 - 4 of the digest 282 * uint32_t : bytes 5 - 8 of the digest 283 * uint32_t : bytes 9 - 12 of the digest 284 * uint32_t : bytes 12 - 16 of the digest 285 * uint32_t : bytes 16 - 20 of the digest 286 * SHA1_CTX * : the context to update 287 * uint8_t [64]: the block to use to update the digest 288 * output: void 289 */ 290 291 292void 293SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, 294 SHA1_CTX *ctx, const uint8_t blk[64]) 295{ 296 /* 297 * sparc optimization: 298 * 299 * while it is somewhat counter-intuitive, on sparc, it is 300 * more efficient to place all the constants used in this 301 * function in an array and load the values out of the array 302 * than to manually load the constants. this is because 303 * setting a register to a 32-bit value takes two ops in most 304 * cases: a `sethi' and an `or', but loading a 32-bit value 305 * from memory only takes one `ld' (or `lduw' on v9). while 306 * this increases memory usage, the compiler can find enough 307 * other things to do while waiting to keep the pipeline does 308 * not stall. additionally, it is likely that many of these 309 * constants are cached so that later accesses do not even go 310 * out to the bus. 311 * 312 * this array is declared `static' to keep the compiler from 313 * having to bcopy() this array onto the stack frame of 314 * SHA1Transform() each time it is called -- which is 315 * unacceptably expensive. 316 * 317 * the `const' is to ensure that callers are good citizens and 318 * do not try to munge the array. since these routines are 319 * going to be called from inside multithreaded kernelland, 320 * this is a good safety check. -- `sha1_consts' will end up in 321 * .rodata. 322 * 323 * unfortunately, loading from an array in this manner hurts 324 * performance under Intel. So, there is a macro, 325 * SHA1_CONST(), used in SHA1Transform(), that either expands to 326 * a reference to this array, or to the actual constant, 327 * depending on what platform this code is compiled for. 328 */ 329 330 331 static const uint32_t sha1_consts[] = { 332 SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3 333 }; 334 335 336 /* 337 * general optimization: 338 * 339 * use individual integers instead of using an array. this is a 340 * win, although the amount it wins by seems to vary quite a bit. 341 */ 342 343 344 uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7; 345 uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15; 346 347 348 /* 349 * sparc optimization: 350 * 351 * if `block' is already aligned on a 4-byte boundary, use 352 * LOAD_BIG_32() directly. otherwise, bcopy() into a 353 * buffer that *is* aligned on a 4-byte boundary and then do 354 * the LOAD_BIG_32() on that buffer. benchmarks have shown 355 * that using the bcopy() is better than loading the bytes 356 * individually and doing the endian-swap by hand. 357 * 358 * even though it's quite tempting to assign to do: 359 * 360 * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32)); 361 * 362 * and only have one set of LOAD_BIG_32()'s, the compiler 363 * *does not* like that, so please resist the urge. 364 */ 365 366 367 if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */ 368 bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32)); 369 w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15); 370 w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14); 371 w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13); 372 w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12); 373 w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11); 374 w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10); 375 w_9 = LOAD_BIG_32(ctx->buf_un.buf32 + 9); 376 w_8 = LOAD_BIG_32(ctx->buf_un.buf32 + 8); 377 w_7 = LOAD_BIG_32(ctx->buf_un.buf32 + 7); 378 w_6 = LOAD_BIG_32(ctx->buf_un.buf32 + 6); 379 w_5 = LOAD_BIG_32(ctx->buf_un.buf32 + 5); 380 w_4 = LOAD_BIG_32(ctx->buf_un.buf32 + 4); 381 w_3 = LOAD_BIG_32(ctx->buf_un.buf32 + 3); 382 w_2 = LOAD_BIG_32(ctx->buf_un.buf32 + 2); 383 w_1 = LOAD_BIG_32(ctx->buf_un.buf32 + 1); 384 w_0 = LOAD_BIG_32(ctx->buf_un.buf32 + 0); 385 } else { 386 /* LINTED E_BAD_PTR_CAST_ALIGN */ 387 w_15 = LOAD_BIG_32(blk + 60); 388 /* LINTED E_BAD_PTR_CAST_ALIGN */ 389 w_14 = LOAD_BIG_32(blk + 56); 390 /* LINTED E_BAD_PTR_CAST_ALIGN */ 391 w_13 = LOAD_BIG_32(blk + 52); 392 /* LINTED E_BAD_PTR_CAST_ALIGN */ 393 w_12 = LOAD_BIG_32(blk + 48); 394 /* LINTED E_BAD_PTR_CAST_ALIGN */ 395 w_11 = LOAD_BIG_32(blk + 44); 396 /* LINTED E_BAD_PTR_CAST_ALIGN */ 397 w_10 = LOAD_BIG_32(blk + 40); 398 /* LINTED E_BAD_PTR_CAST_ALIGN */ 399 w_9 = LOAD_BIG_32(blk + 36); 400 /* LINTED E_BAD_PTR_CAST_ALIGN */ 401 w_8 = LOAD_BIG_32(blk + 32); 402 /* LINTED E_BAD_PTR_CAST_ALIGN */ 403 w_7 = LOAD_BIG_32(blk + 28); 404 /* LINTED E_BAD_PTR_CAST_ALIGN */ 405 w_6 = LOAD_BIG_32(blk + 24); 406 /* LINTED E_BAD_PTR_CAST_ALIGN */ 407 w_5 = LOAD_BIG_32(blk + 20); 408 /* LINTED E_BAD_PTR_CAST_ALIGN */ 409 w_4 = LOAD_BIG_32(blk + 16); 410 /* LINTED E_BAD_PTR_CAST_ALIGN */ 411 w_3 = LOAD_BIG_32(blk + 12); 412 /* LINTED E_BAD_PTR_CAST_ALIGN */ 413 w_2 = LOAD_BIG_32(blk + 8); 414 /* LINTED E_BAD_PTR_CAST_ALIGN */ 415 w_1 = LOAD_BIG_32(blk + 4); 416 /* LINTED E_BAD_PTR_CAST_ALIGN */ 417 w_0 = LOAD_BIG_32(blk + 0); 418 } 419#else /* !defined(__sparc) */ 420 421void /* CSTYLED */ 422SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64]) 423{ 424 /* CSTYLED */ 425 sha1word a = ctx->state[0]; 426 sha1word b = ctx->state[1]; 427 sha1word c = ctx->state[2]; 428 sha1word d = ctx->state[3]; 429 sha1word e = ctx->state[4]; 430 431#if defined(W_ARRAY) 432 sha1word w[16]; 433#else /* !defined(W_ARRAY) */ 434 sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7; 435 sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15; 436#endif /* !defined(W_ARRAY) */ 437 438 W(0) = LOAD_BIG_32((void *)(blk + 0)); 439 W(1) = LOAD_BIG_32((void *)(blk + 4)); 440 W(2) = LOAD_BIG_32((void *)(blk + 8)); 441 W(3) = LOAD_BIG_32((void *)(blk + 12)); 442 W(4) = LOAD_BIG_32((void *)(blk + 16)); 443 W(5) = LOAD_BIG_32((void *)(blk + 20)); 444 W(6) = LOAD_BIG_32((void *)(blk + 24)); 445 W(7) = LOAD_BIG_32((void *)(blk + 28)); 446 W(8) = LOAD_BIG_32((void *)(blk + 32)); 447 W(9) = LOAD_BIG_32((void *)(blk + 36)); 448 W(10) = LOAD_BIG_32((void *)(blk + 40)); 449 W(11) = LOAD_BIG_32((void *)(blk + 44)); 450 W(12) = LOAD_BIG_32((void *)(blk + 48)); 451 W(13) = LOAD_BIG_32((void *)(blk + 52)); 452 W(14) = LOAD_BIG_32((void *)(blk + 56)); 453 W(15) = LOAD_BIG_32((void *)(blk + 60)); 454 455#endif /* !defined(__sparc) */ 456 457 /* 458 * general optimization: 459 * 460 * even though this approach is described in the standard as 461 * being slower algorithmically, it is 30-40% faster than the 462 * "faster" version under SPARC, because this version has more 463 * of the constraints specified at compile-time and uses fewer 464 * variables (and therefore has better register utilization) 465 * than its "speedier" brother. (i've tried both, trust me) 466 * 467 * for either method given in the spec, there is an "assignment" 468 * phase where the following takes place: 469 * 470 * tmp = (main_computation); 471 * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp; 472 * 473 * we can make the algorithm go faster by not doing this work, 474 * but just pretending that `d' is now `e', etc. this works 475 * really well and obviates the need for a temporary variable. 476 * however, we still explicitly perform the rotate action, 477 * since it is cheaper on SPARC to do it once than to have to 478 * do it over and over again. 479 */ 480 481 /* round 1 */ 482 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */ 483 b = ROTATE_LEFT(b, 30); 484 485 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */ 486 a = ROTATE_LEFT(a, 30); 487 488 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */ 489 e = ROTATE_LEFT(e, 30); 490 491 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */ 492 d = ROTATE_LEFT(d, 30); 493 494 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */ 495 c = ROTATE_LEFT(c, 30); 496 497 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */ 498 b = ROTATE_LEFT(b, 30); 499 500 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */ 501 a = ROTATE_LEFT(a, 30); 502 503 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */ 504 e = ROTATE_LEFT(e, 30); 505 506 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */ 507 d = ROTATE_LEFT(d, 30); 508 509 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */ 510 c = ROTATE_LEFT(c, 30); 511 512 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */ 513 b = ROTATE_LEFT(b, 30); 514 515 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */ 516 a = ROTATE_LEFT(a, 30); 517 518 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */ 519 e = ROTATE_LEFT(e, 30); 520 521 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */ 522 d = ROTATE_LEFT(d, 30); 523 524 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */ 525 c = ROTATE_LEFT(c, 30); 526 527 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */ 528 b = ROTATE_LEFT(b, 30); 529 530 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 16 */ 531 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0); 532 a = ROTATE_LEFT(a, 30); 533 534 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 17 */ 535 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0); 536 e = ROTATE_LEFT(e, 30); 537 538 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 18 */ 539 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0); 540 d = ROTATE_LEFT(d, 30); 541 542 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 19 */ 543 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0); 544 c = ROTATE_LEFT(c, 30); 545 546 /* round 2 */ 547 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 20 */ 548 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1); 549 b = ROTATE_LEFT(b, 30); 550 551 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 21 */ 552 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1); 553 a = ROTATE_LEFT(a, 30); 554 555 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 22 */ 556 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1); 557 e = ROTATE_LEFT(e, 30); 558 559 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 23 */ 560 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1); 561 d = ROTATE_LEFT(d, 30); 562 563 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 24 */ 564 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1); 565 c = ROTATE_LEFT(c, 30); 566 567 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 25 */ 568 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1); 569 b = ROTATE_LEFT(b, 30); 570 571 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 26 */ 572 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1); 573 a = ROTATE_LEFT(a, 30); 574 575 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 27 */ 576 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1); 577 e = ROTATE_LEFT(e, 30); 578 579 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 28 */ 580 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1); 581 d = ROTATE_LEFT(d, 30); 582 583 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */ 584 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1); 585 c = ROTATE_LEFT(c, 30); 586 587 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 30 */ 588 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1); 589 b = ROTATE_LEFT(b, 30); 590 591 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 31 */ 592 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1); 593 a = ROTATE_LEFT(a, 30); 594 595 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 32 */ 596 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1); 597 e = ROTATE_LEFT(e, 30); 598 599 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 33 */ 600 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1); 601 d = ROTATE_LEFT(d, 30); 602 603 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 34 */ 604 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1); 605 c = ROTATE_LEFT(c, 30); 606 607 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 35 */ 608 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1); 609 b = ROTATE_LEFT(b, 30); 610 611 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 36 */ 612 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1); 613 a = ROTATE_LEFT(a, 30); 614 615 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 37 */ 616 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1); 617 e = ROTATE_LEFT(e, 30); 618 619 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 38 */ 620 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1); 621 d = ROTATE_LEFT(d, 30); 622 623 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 39 */ 624 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1); 625 c = ROTATE_LEFT(c, 30); 626 627 /* round 3 */ 628 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 40 */ 629 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2); 630 b = ROTATE_LEFT(b, 30); 631 632 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 41 */ 633 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2); 634 a = ROTATE_LEFT(a, 30); 635 636 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 42 */ 637 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2); 638 e = ROTATE_LEFT(e, 30); 639 640 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 43 */ 641 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2); 642 d = ROTATE_LEFT(d, 30); 643 644 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 44 */ 645 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2); 646 c = ROTATE_LEFT(c, 30); 647 648 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */ 649 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2); 650 b = ROTATE_LEFT(b, 30); 651 652 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 46 */ 653 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2); 654 a = ROTATE_LEFT(a, 30); 655 656 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 47 */ 657 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2); 658 e = ROTATE_LEFT(e, 30); 659 660 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 48 */ 661 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2); 662 d = ROTATE_LEFT(d, 30); 663 664 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 49 */ 665 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2); 666 c = ROTATE_LEFT(c, 30); 667 668 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 50 */ 669 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2); 670 b = ROTATE_LEFT(b, 30); 671 672 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 51 */ 673 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2); 674 a = ROTATE_LEFT(a, 30); 675 676 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 52 */ 677 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2); 678 e = ROTATE_LEFT(e, 30); 679 680 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 53 */ 681 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2); 682 d = ROTATE_LEFT(d, 30); 683 684 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 54 */ 685 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2); 686 c = ROTATE_LEFT(c, 30); 687 688 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 55 */ 689 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2); 690 b = ROTATE_LEFT(b, 30); 691 692 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 56 */ 693 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2); 694 a = ROTATE_LEFT(a, 30); 695 696 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 57 */ 697 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2); 698 e = ROTATE_LEFT(e, 30); 699 700 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 58 */ 701 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2); 702 d = ROTATE_LEFT(d, 30); 703 704 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 59 */ 705 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2); 706 c = ROTATE_LEFT(c, 30); 707 708 /* round 4 */ 709 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 60 */ 710 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3); 711 b = ROTATE_LEFT(b, 30); 712 713 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */ 714 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3); 715 a = ROTATE_LEFT(a, 30); 716 717 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 62 */ 718 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3); 719 e = ROTATE_LEFT(e, 30); 720 721 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 63 */ 722 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3); 723 d = ROTATE_LEFT(d, 30); 724 725 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 64 */ 726 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3); 727 c = ROTATE_LEFT(c, 30); 728 729 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 65 */ 730 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3); 731 b = ROTATE_LEFT(b, 30); 732 733 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 66 */ 734 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3); 735 a = ROTATE_LEFT(a, 30); 736 737 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 67 */ 738 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3); 739 e = ROTATE_LEFT(e, 30); 740 741 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 68 */ 742 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3); 743 d = ROTATE_LEFT(d, 30); 744 745 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 69 */ 746 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3); 747 c = ROTATE_LEFT(c, 30); 748 749 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 70 */ 750 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3); 751 b = ROTATE_LEFT(b, 30); 752 753 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 71 */ 754 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3); 755 a = ROTATE_LEFT(a, 30); 756 757 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 72 */ 758 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3); 759 e = ROTATE_LEFT(e, 30); 760 761 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 73 */ 762 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3); 763 d = ROTATE_LEFT(d, 30); 764 765 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 74 */ 766 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3); 767 c = ROTATE_LEFT(c, 30); 768 769 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 75 */ 770 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3); 771 b = ROTATE_LEFT(b, 30); 772 773 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 76 */ 774 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3); 775 a = ROTATE_LEFT(a, 30); 776 777 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */ 778 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3); 779 e = ROTATE_LEFT(e, 30); 780 781 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 78 */ 782 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3); 783 d = ROTATE_LEFT(d, 30); 784 785 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 79 */ 786 787 ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) + 788 SHA1_CONST(3); 789 ctx->state[1] += b; 790 ctx->state[2] += ROTATE_LEFT(c, 30); 791 ctx->state[3] += d; 792 ctx->state[4] += e; 793 794 /* zeroize sensitive information */ 795 W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0; 796 W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0; 797} 798#endif /* !__amd64 */ 799 800 801/* 802 * Encode() 803 * 804 * purpose: to convert a list of numbers from little endian to big endian 805 * input: uint8_t * : place to store the converted big endian numbers 806 * uint32_t * : place to get numbers to convert from 807 * size_t : the length of the input in bytes 808 * output: void 809 */ 810 811static void 812Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input, 813 size_t len) 814{ 815 size_t i, j; 816 817#if defined(__sparc) 818 if (IS_P2ALIGNED(output, sizeof (uint32_t))) { 819 for (i = 0, j = 0; j < len; i++, j += 4) { 820 /* LINTED E_BAD_PTR_CAST_ALIGN */ 821 *((uint32_t *)(output + j)) = input[i]; 822 } 823 } else { 824#endif /* little endian -- will work on big endian, but slowly */ 825 826 for (i = 0, j = 0; j < len; i++, j += 4) { 827 output[j] = (input[i] >> 24) & 0xff; 828 output[j + 1] = (input[i] >> 16) & 0xff; 829 output[j + 2] = (input[i] >> 8) & 0xff; 830 output[j + 3] = input[i] & 0xff; 831 } 832#if defined(__sparc) 833 } 834#endif 835} 836