1/* vi: set sw=4 ts=4: */
2/*
3 * Based on shasum from http://www.netsw.org/crypto/hash/
4 * Majorly hacked up to use Dr Brian Gladman's sha1 code
5 *
6 * Copyright (C) 2002 Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
7 * Copyright (C) 2003 Glenn L. McGrath
8 * Copyright (C) 2003 Erik Andersen
9 *
10 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
11 *
12 * ---------------------------------------------------------------------------
13 * Issue Date: 10/11/2002
14 *
15 * This is a byte oriented version of SHA1 that operates on arrays of bytes
16 * stored in memory. It runs at 22 cycles per byte on a Pentium P4 processor
17 *
18 * ---------------------------------------------------------------------------
19 *
20 * SHA256 and SHA512 parts are:
21 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
22 * Shrank by Denys Vlasenko.
23 *
24 * ---------------------------------------------------------------------------
25 *
26 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
27 * and replace "4096" with something like "2000 + time(NULL) % 2097",
28 * then rebuild and compare "shaNNNsum bigfile" results.
29 */
30
31#include "libbb.h"
32
33#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
34#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
35/* for sha512: */
36#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
37#if BB_LITTLE_ENDIAN
38static inline uint64_t hton64(uint64_t v)
39{
40	return (((uint64_t)htonl(v)) << 32) | htonl(v >> 32);
41}
42#else
43#define hton64(v) (v)
44#endif
45#define ntoh64(v) hton64(v)
46
47/* To check alignment gcc has an appropriate operator.  Other
48   compilers don't.  */
49#if defined(__GNUC__) && __GNUC__ >= 2
50# define UNALIGNED_P(p,type) (((uintptr_t) p) % __alignof__(type) != 0)
51#else
52# define UNALIGNED_P(p,type) (((uintptr_t) p) % sizeof(type) != 0)
53#endif
54
55
56/* Some arch headers have conflicting defines */
57#undef ch
58#undef parity
59#undef maj
60#undef rnd
61
62static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
63{
64	unsigned t;
65	uint32_t W[80], a, b, c, d, e;
66	const uint32_t *words = (uint32_t*) ctx->wbuffer;
67
68	for (t = 0; t < 16; ++t) {
69		W[t] = ntohl(*words);
70		words++;
71	}
72
73	for (/*t = 16*/; t < 80; ++t) {
74		uint32_t T = W[t - 3] ^ W[t - 8] ^ W[t - 14] ^ W[t - 16];
75		W[t] = rotl32(T, 1);
76	}
77
78	a = ctx->hash[0];
79	b = ctx->hash[1];
80	c = ctx->hash[2];
81	d = ctx->hash[3];
82	e = ctx->hash[4];
83
84/* Reverse byte order in 32-bit words   */
85#define ch(x,y,z)        ((z) ^ ((x) & ((y) ^ (z))))
86#define parity(x,y,z)    ((x) ^ (y) ^ (z))
87#define maj(x,y,z)       (((x) & (y)) | ((z) & ((x) | (y))))
88/* A normal version as set out in the FIPS. This version uses   */
89/* partial loop unrolling and is optimised for the Pentium 4    */
90#define rnd(f,k) \
91	do { \
92		uint32_t T = a; \
93		a = rotl32(a, 5) + f(b, c, d) + e + k + W[t]; \
94		e = d; \
95		d = c; \
96		c = rotl32(b, 30); \
97		b = T; \
98	} while (0)
99
100	for (t = 0; t < 20; ++t)
101		rnd(ch, 0x5a827999);
102
103	for (/*t = 20*/; t < 40; ++t)
104		rnd(parity, 0x6ed9eba1);
105
106	for (/*t = 40*/; t < 60; ++t)
107		rnd(maj, 0x8f1bbcdc);
108
109	for (/*t = 60*/; t < 80; ++t)
110		rnd(parity, 0xca62c1d6);
111#undef ch
112#undef parity
113#undef maj
114#undef rnd
115
116	ctx->hash[0] += a;
117	ctx->hash[1] += b;
118	ctx->hash[2] += c;
119	ctx->hash[3] += d;
120	ctx->hash[4] += e;
121}
122
123/* Constants for SHA512 from FIPS 180-2:4.2.3.
124 * SHA256 constants from FIPS 180-2:4.2.2
125 * are the most significant half of first 64 elements
126 * of the same array.
127 */
128static const uint64_t sha_K[80] = {
129	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
130	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
131	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
132	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
133	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
134	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
135	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
136	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
137	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
138	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
139	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
140	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
141	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
142	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
143	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
144	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
145	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
146	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
147	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
148	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
149	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
150	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
151	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
152	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
153	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
154	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
155	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
156	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
157	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
158	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
159	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
160	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
161	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
162	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
163	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
164	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
165	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
166	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
167	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
168	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
169};
170
171#undef Ch
172#undef Maj
173#undef S0
174#undef S1
175#undef R0
176#undef R1
177
178static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
179{
180	unsigned t;
181	uint32_t W[64], a, b, c, d, e, f, g, h;
182	const uint32_t *words = (uint32_t*) ctx->wbuffer;
183
184	/* Operators defined in FIPS 180-2:4.1.2.  */
185#define Ch(x, y, z) ((x & y) ^ (~x & z))
186#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
187#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
188#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
189#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
190#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
191
192	/* Compute the message schedule according to FIPS 180-2:6.2.2 step 2.  */
193	for (t = 0; t < 16; ++t) {
194		W[t] = ntohl(*words);
195		words++;
196	}
197
198	for (/*t = 16*/; t < 64; ++t)
199		W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
200
201	a = ctx->hash[0];
202	b = ctx->hash[1];
203	c = ctx->hash[2];
204	d = ctx->hash[3];
205	e = ctx->hash[4];
206	f = ctx->hash[5];
207	g = ctx->hash[6];
208	h = ctx->hash[7];
209
210	/* The actual computation according to FIPS 180-2:6.2.2 step 3.  */
211	for (t = 0; t < 64; ++t) {
212		/* Need to fetch upper half of sha_K[t]
213		 * (I hope compiler is clever enough to just fetch
214		 * upper half)
215		 */
216		uint32_t K_t = sha_K[t] >> 32;
217		uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
218		uint32_t T2 = S0(a) + Maj(a, b, c);
219		h = g;
220		g = f;
221		f = e;
222		e = d + T1;
223		d = c;
224		c = b;
225		b = a;
226		a = T1 + T2;
227	}
228#undef Ch
229#undef Maj
230#undef S0
231#undef S1
232#undef R0
233#undef R1
234	/* Add the starting values of the context according to FIPS 180-2:6.2.2
235	   step 4.  */
236	ctx->hash[0] += a;
237	ctx->hash[1] += b;
238	ctx->hash[2] += c;
239	ctx->hash[3] += d;
240	ctx->hash[4] += e;
241	ctx->hash[5] += f;
242	ctx->hash[6] += g;
243	ctx->hash[7] += h;
244}
245
246static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
247{
248	unsigned t;
249	uint64_t W[80];
250	/* On i386, having assignments here (not later as sha256 does)
251	 * produces 99 bytes smaller code with gcc 4.3.1
252	 */
253	uint64_t a = ctx->hash[0];
254	uint64_t b = ctx->hash[1];
255	uint64_t c = ctx->hash[2];
256	uint64_t d = ctx->hash[3];
257	uint64_t e = ctx->hash[4];
258	uint64_t f = ctx->hash[5];
259	uint64_t g = ctx->hash[6];
260	uint64_t h = ctx->hash[7];
261	const uint64_t *words = (uint64_t*) ctx->wbuffer;
262
263	/* Operators defined in FIPS 180-2:4.1.2.  */
264#define Ch(x, y, z) ((x & y) ^ (~x & z))
265#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
266#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
267#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
268#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
269#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
270
271	/* Compute the message schedule according to FIPS 180-2:6.3.2 step 2.  */
272	for (t = 0; t < 16; ++t) {
273		W[t] = ntoh64(*words);
274		words++;
275	}
276	for (/*t = 16*/; t < 80; ++t)
277		W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
278
279	/* The actual computation according to FIPS 180-2:6.3.2 step 3.  */
280	for (t = 0; t < 80; ++t) {
281		uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
282		uint64_t T2 = S0(a) + Maj(a, b, c);
283		h = g;
284		g = f;
285		f = e;
286		e = d + T1;
287		d = c;
288		c = b;
289		b = a;
290		a = T1 + T2;
291	}
292#undef Ch
293#undef Maj
294#undef S0
295#undef S1
296#undef R0
297#undef R1
298	/* Add the starting values of the context according to FIPS 180-2:6.3.2
299	   step 4.  */
300	ctx->hash[0] += a;
301	ctx->hash[1] += b;
302	ctx->hash[2] += c;
303	ctx->hash[3] += d;
304	ctx->hash[4] += e;
305	ctx->hash[5] += f;
306	ctx->hash[6] += g;
307	ctx->hash[7] += h;
308}
309
310
311void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
312{
313	ctx->hash[0] = 0x67452301;
314	ctx->hash[1] = 0xefcdab89;
315	ctx->hash[2] = 0x98badcfe;
316	ctx->hash[3] = 0x10325476;
317	ctx->hash[4] = 0xc3d2e1f0;
318	ctx->total64 = 0;
319	ctx->process_block = sha1_process_block64;
320}
321
322static const uint32_t init256[] = {
323	0x6a09e667,
324	0xbb67ae85,
325	0x3c6ef372,
326	0xa54ff53a,
327	0x510e527f,
328	0x9b05688c,
329	0x1f83d9ab,
330	0x5be0cd19
331};
332static const uint32_t init512_lo[] = {
333	0xf3bcc908,
334	0x84caa73b,
335	0xfe94f82b,
336	0x5f1d36f1,
337	0xade682d1,
338	0x2b3e6c1f,
339	0xfb41bd6b,
340	0x137e2179
341};
342
343/* Initialize structure containing state of computation.
344   (FIPS 180-2:5.3.2)  */
345void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
346{
347	memcpy(ctx->hash, init256, sizeof(init256));
348	ctx->total64 = 0;
349	ctx->process_block = sha256_process_block64;
350}
351
352/* Initialize structure containing state of computation.
353   (FIPS 180-2:5.3.3)  */
354void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
355{
356	int i;
357	for (i = 0; i < 8; i++)
358		ctx->hash[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
359	ctx->total64[0] = ctx->total64[1] = 0;
360}
361
362
363/* Used also for sha256 */
364void FAST_FUNC sha1_hash(const void *buffer, size_t len, sha1_ctx_t *ctx)
365{
366	unsigned in_buf = ctx->total64 & 63;
367	unsigned add = 64 - in_buf;
368
369	ctx->total64 += len;
370
371	while (len >= add) {	/* transfer whole blocks while possible  */
372		memcpy(ctx->wbuffer + in_buf, buffer, add);
373		buffer = (const char *)buffer + add;
374		len -= add;
375		add = 64;
376		in_buf = 0;
377		ctx->process_block(ctx);
378	}
379
380	memcpy(ctx->wbuffer + in_buf, buffer, len);
381}
382
383void FAST_FUNC sha512_hash(const void *buffer, size_t len, sha512_ctx_t *ctx)
384{
385	unsigned in_buf = ctx->total64[0] & 127;
386	unsigned add = 128 - in_buf;
387
388	/* First increment the byte count.  FIPS 180-2 specifies the possible
389	   length of the file up to 2^128 _bits_.
390	   We compute the number of _bytes_ and convert to bits later.  */
391	ctx->total64[0] += len;
392	if (ctx->total64[0] < len)
393		ctx->total64[1]++;
394
395	while (len >= add) {	/* transfer whole blocks while possible  */
396		memcpy(ctx->wbuffer + in_buf, buffer, add);
397		buffer = (const char *)buffer + add;
398		len -= add;
399		add = 128;
400		in_buf = 0;
401		sha512_process_block128(ctx);
402	}
403
404	memcpy(ctx->wbuffer + in_buf, buffer, len);
405}
406
407
408/* Used also for sha256 */
409void FAST_FUNC sha1_end(void *resbuf, sha1_ctx_t *ctx)
410{
411	unsigned pad, in_buf;
412
413	in_buf = ctx->total64 & 63;
414	/* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
415	ctx->wbuffer[in_buf++] = 0x80;
416
417	/* This loop iterates either once or twice, no more, no less */
418	while (1) {
419		pad = 64 - in_buf;
420		memset(ctx->wbuffer + in_buf, 0, pad);
421		in_buf = 0;
422		/* Do we have enough space for the length count? */
423		if (pad >= 8) {
424			/* Store the 64-bit counter of bits in the buffer in BE format */
425			uint64_t t = ctx->total64 << 3;
426			t = hton64(t);
427			/* wbuffer is suitably aligned for this */
428			*(uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
429		}
430		ctx->process_block(ctx);
431		if (pad >= 8)
432			break;
433	}
434
435	in_buf = (ctx->process_block == sha1_process_block64) ? 5 : 8;
436	/* This way we do not impose alignment constraints on resbuf: */
437	if (BB_LITTLE_ENDIAN) {
438		unsigned i;
439		for (i = 0; i < in_buf; ++i)
440			ctx->hash[i] = htonl(ctx->hash[i]);
441	}
442	memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * in_buf);
443}
444
445void FAST_FUNC sha512_end(void *resbuf, sha512_ctx_t *ctx)
446{
447	unsigned pad, in_buf;
448
449	in_buf = ctx->total64[0] & 127;
450	/* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0...
451	 * (FIPS 180-2:5.1.2)
452	 */
453	ctx->wbuffer[in_buf++] = 0x80;
454
455	while (1) {
456		pad = 128 - in_buf;
457		memset(ctx->wbuffer + in_buf, 0, pad);
458		in_buf = 0;
459		if (pad >= 16) {
460			/* Store the 128-bit counter of bits in the buffer in BE format */
461			uint64_t t;
462			t = ctx->total64[0] << 3;
463			t = hton64(t);
464			*(uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
465			t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
466			t = hton64(t);
467			*(uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
468		}
469		sha512_process_block128(ctx);
470		if (pad >= 16)
471			break;
472	}
473
474	if (BB_LITTLE_ENDIAN) {
475		unsigned i;
476		for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
477			ctx->hash[i] = hton64(ctx->hash[i]);
478	}
479	memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
480}
481