1/*
2 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3 * Use is subject to license terms.
4 */
5/*
6 * Copyright 2013 Saso Kiselkov.  All rights reserved.
7 */
8
9/*
10 * The basic framework for this code came from the reference
11 * implementation for MD5.  That implementation is Copyright (C)
12 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
13 *
14 * License to copy and use this software is granted provided that it
15 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
16 * Algorithm" in all material mentioning or referencing this software
17 * or this function.
18 *
19 * License is also granted to make and use derivative works provided
20 * that such works are identified as "derived from the RSA Data
21 * Security, Inc. MD5 Message-Digest Algorithm" in all material
22 * mentioning or referencing the derived work.
23 *
24 * RSA Data Security, Inc. makes no representations concerning either
25 * the merchantability of this software or the suitability of this
26 * software for any particular purpose. It is provided "as is"
27 * without express or implied warranty of any kind.
28 *
29 * These notices must be retained in any copies of any part of this
30 * documentation and/or software.
31 *
32 * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
33 * standard, available at
34 * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
35 * Not as fast as one would like -- further optimizations are encouraged
36 * and appreciated.
37 */
38
39#include <sys/zfs_context.h>
40#define	_SHA2_IMPL
41#include <sys/sha2.h>
42#include <sha2/sha2_consts.h>
43
44#define	_RESTRICT_KYWD
45
46#ifdef _ZFS_LITTLE_ENDIAN
47#include <sys/byteorder.h>
48#define	HAVE_HTONL
49#endif
50#include <sys/isa_defs.h>	/* for _ILP32 */
51
52static void Encode(uint8_t *, uint32_t *, size_t);
53static void Encode64(uint8_t *, uint64_t *, size_t);
54
55/* userspace only supports the generic version */
56#if	defined(__amd64) && defined(_KERNEL)
57#define	SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
58#define	SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
59
60void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
61void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
62
63#else
64static void SHA256Transform(SHA2_CTX *, const uint8_t *);
65static void SHA512Transform(SHA2_CTX *, const uint8_t *);
66#endif	/* __amd64 && _KERNEL */
67
68static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
69
70/*
71 * The low-level checksum routines use a lot of stack space. On systems where
72 * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
73 * barriers to reduce stack frame size. This can reduce the SHA512Transform()
74 * stack frame usage from 3k to <1k on ARM32, for example.
75 */
76#if defined(_ILP32) || defined(__powerpc)	/* small stack */
77#define	SMALL_STACK_MEMORY_BARRIER	asm volatile("": : :"memory");
78#else
79#define	SMALL_STACK_MEMORY_BARRIER
80#endif
81
82/* Ch and Maj are the basic SHA2 functions. */
83#define	Ch(b, c, d)	(((b) & (c)) ^ ((~b) & (d)))
84#define	Maj(b, c, d)	(((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
85
86/* Rotates x right n bits. */
87#define	ROTR(x, n)	\
88	(((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
89
90/* Shift x right n bits */
91#define	SHR(x, n)	((x) >> (n))
92
93/* SHA256 Functions */
94#define	BIGSIGMA0_256(x)	(ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
95#define	BIGSIGMA1_256(x)	(ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
96#define	SIGMA0_256(x)		(ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
97#define	SIGMA1_256(x)		(ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
98
99#define	SHA256ROUND(a, b, c, d, e, f, g, h, i, w)			\
100	T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;	\
101	d += T1;							\
102	T2 = BIGSIGMA0_256(a) + Maj(a, b, c);				\
103	h = T1 + T2
104
105/* SHA384/512 Functions */
106#define	BIGSIGMA0(x)	(ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
107#define	BIGSIGMA1(x)	(ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
108#define	SIGMA0(x)	(ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
109#define	SIGMA1(x)	(ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
110#define	SHA512ROUND(a, b, c, d, e, f, g, h, i, w)			\
111	T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;	\
112	d += T1;							\
113	T2 = BIGSIGMA0(a) + Maj(a, b, c);				\
114	h = T1 + T2;							\
115	SMALL_STACK_MEMORY_BARRIER;
116
117/*
118 * sparc optimization:
119 *
120 * on the sparc, we can load big endian 32-bit data easily.  note that
121 * special care must be taken to ensure the address is 32-bit aligned.
122 * in the interest of speed, we don't check to make sure, since
123 * careful programming can guarantee this for us.
124 */
125
126#if	defined(_ZFS_BIG_ENDIAN)
127#define	LOAD_BIG_32(addr)	(*(uint32_t *)(addr))
128#define	LOAD_BIG_64(addr)	(*(uint64_t *)(addr))
129
130#elif	defined(HAVE_HTONL)
131#define	LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
132#define	LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
133
134#else
135/* little endian -- will work on big endian, but slowly */
136#define	LOAD_BIG_32(addr)	\
137	(((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
138#define	LOAD_BIG_64(addr)	\
139	(((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |	\
140	    ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) |	\
141	    ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) |	\
142	    ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
143#endif	/* _BIG_ENDIAN */
144
145
146#if	!defined(__amd64) || !defined(_KERNEL)
147/* SHA256 Transform */
148
149static void
150SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
151{
152	uint32_t a = ctx->state.s32[0];
153	uint32_t b = ctx->state.s32[1];
154	uint32_t c = ctx->state.s32[2];
155	uint32_t d = ctx->state.s32[3];
156	uint32_t e = ctx->state.s32[4];
157	uint32_t f = ctx->state.s32[5];
158	uint32_t g = ctx->state.s32[6];
159	uint32_t h = ctx->state.s32[7];
160
161	uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
162	uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
163	uint32_t T1, T2;
164
165#if	defined(__sparc)
166	static const uint32_t sha256_consts[] = {
167		SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
168		SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
169		SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
170		SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
171		SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
172		SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
173		SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
174		SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
175		SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
176		SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
177		SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
178		SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
179		SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
180		SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
181		SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
182		SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
183		SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
184		SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
185		SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
186		SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
187		SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
188		SHA256_CONST_63
189	};
190#endif	/* __sparc */
191
192	if ((uintptr_t)blk & 0x3) {		/* not 4-byte aligned? */
193		bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
194		blk = (uint8_t *)ctx->buf_un.buf32;
195	}
196
197	/* LINTED E_BAD_PTR_CAST_ALIGN */
198	w0 =  LOAD_BIG_32(blk + 4 * 0);
199	SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
200	/* LINTED E_BAD_PTR_CAST_ALIGN */
201	w1 =  LOAD_BIG_32(blk + 4 * 1);
202	SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
203	/* LINTED E_BAD_PTR_CAST_ALIGN */
204	w2 =  LOAD_BIG_32(blk + 4 * 2);
205	SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
206	/* LINTED E_BAD_PTR_CAST_ALIGN */
207	w3 =  LOAD_BIG_32(blk + 4 * 3);
208	SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
209	/* LINTED E_BAD_PTR_CAST_ALIGN */
210	w4 =  LOAD_BIG_32(blk + 4 * 4);
211	SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
212	/* LINTED E_BAD_PTR_CAST_ALIGN */
213	w5 =  LOAD_BIG_32(blk + 4 * 5);
214	SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
215	/* LINTED E_BAD_PTR_CAST_ALIGN */
216	w6 =  LOAD_BIG_32(blk + 4 * 6);
217	SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
218	/* LINTED E_BAD_PTR_CAST_ALIGN */
219	w7 =  LOAD_BIG_32(blk + 4 * 7);
220	SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
221	/* LINTED E_BAD_PTR_CAST_ALIGN */
222	w8 =  LOAD_BIG_32(blk + 4 * 8);
223	SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
224	/* LINTED E_BAD_PTR_CAST_ALIGN */
225	w9 =  LOAD_BIG_32(blk + 4 * 9);
226	SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
227	/* LINTED E_BAD_PTR_CAST_ALIGN */
228	w10 =  LOAD_BIG_32(blk + 4 * 10);
229	SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
230	/* LINTED E_BAD_PTR_CAST_ALIGN */
231	w11 =  LOAD_BIG_32(blk + 4 * 11);
232	SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
233	/* LINTED E_BAD_PTR_CAST_ALIGN */
234	w12 =  LOAD_BIG_32(blk + 4 * 12);
235	SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
236	/* LINTED E_BAD_PTR_CAST_ALIGN */
237	w13 =  LOAD_BIG_32(blk + 4 * 13);
238	SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
239	/* LINTED E_BAD_PTR_CAST_ALIGN */
240	w14 =  LOAD_BIG_32(blk + 4 * 14);
241	SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
242	/* LINTED E_BAD_PTR_CAST_ALIGN */
243	w15 =  LOAD_BIG_32(blk + 4 * 15);
244	SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
245
246	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
247	SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
248	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
249	SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
250	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
251	SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
252	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
253	SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
254	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
255	SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
256	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
257	SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
258	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
259	SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
260	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
261	SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
262	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
263	SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
264	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
265	SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
266	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
267	SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
268	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
269	SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
270	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
271	SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
272	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
273	SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
274	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
275	SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
276	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
277	SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
278
279	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
280	SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
281	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
282	SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
283	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
284	SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
285	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
286	SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
287	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
288	SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
289	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
290	SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
291	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
292	SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
293	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
294	SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
295	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
296	SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
297	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
298	SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
299	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
300	SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
301	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
302	SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
303	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
304	SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
305	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
306	SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
307	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
308	SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
309	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
310	SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
311
312	w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
313	SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
314	w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
315	SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
316	w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
317	SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
318	w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
319	SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
320	w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
321	SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
322	w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
323	SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
324	w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
325	SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
326	w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
327	SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
328	w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
329	SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
330	w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
331	SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
332	w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
333	SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
334	w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
335	SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
336	w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
337	SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
338	w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
339	SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
340	w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
341	SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
342	w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
343	SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
344
345	ctx->state.s32[0] += a;
346	ctx->state.s32[1] += b;
347	ctx->state.s32[2] += c;
348	ctx->state.s32[3] += d;
349	ctx->state.s32[4] += e;
350	ctx->state.s32[5] += f;
351	ctx->state.s32[6] += g;
352	ctx->state.s32[7] += h;
353}
354
355
356/* SHA384 and SHA512 Transform */
357
358static void
359SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
360{
361
362	uint64_t a = ctx->state.s64[0];
363	uint64_t b = ctx->state.s64[1];
364	uint64_t c = ctx->state.s64[2];
365	uint64_t d = ctx->state.s64[3];
366	uint64_t e = ctx->state.s64[4];
367	uint64_t f = ctx->state.s64[5];
368	uint64_t g = ctx->state.s64[6];
369	uint64_t h = ctx->state.s64[7];
370
371	uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
372	uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
373	uint64_t T1, T2;
374
375#if	defined(__sparc)
376	static const uint64_t sha512_consts[] = {
377		SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
378		SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
379		SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
380		SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
381		SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
382		SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
383		SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
384		SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
385		SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
386		SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
387		SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
388		SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
389		SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
390		SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
391		SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
392		SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
393		SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
394		SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
395		SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
396		SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
397		SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
398		SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
399		SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
400		SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
401		SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
402		SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
403		SHA512_CONST_78, SHA512_CONST_79
404	};
405#endif	/* __sparc */
406
407
408	if ((uintptr_t)blk & 0x7) {		/* not 8-byte aligned? */
409		bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
410		blk = (uint8_t *)ctx->buf_un.buf64;
411	}
412
413	/* LINTED E_BAD_PTR_CAST_ALIGN */
414	w0 =  LOAD_BIG_64(blk + 8 * 0);
415	SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
416	/* LINTED E_BAD_PTR_CAST_ALIGN */
417	w1 =  LOAD_BIG_64(blk + 8 * 1);
418	SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
419	/* LINTED E_BAD_PTR_CAST_ALIGN */
420	w2 =  LOAD_BIG_64(blk + 8 * 2);
421	SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
422	/* LINTED E_BAD_PTR_CAST_ALIGN */
423	w3 =  LOAD_BIG_64(blk + 8 * 3);
424	SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
425	/* LINTED E_BAD_PTR_CAST_ALIGN */
426	w4 =  LOAD_BIG_64(blk + 8 * 4);
427	SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
428	/* LINTED E_BAD_PTR_CAST_ALIGN */
429	w5 =  LOAD_BIG_64(blk + 8 * 5);
430	SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
431	/* LINTED E_BAD_PTR_CAST_ALIGN */
432	w6 =  LOAD_BIG_64(blk + 8 * 6);
433	SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
434	/* LINTED E_BAD_PTR_CAST_ALIGN */
435	w7 =  LOAD_BIG_64(blk + 8 * 7);
436	SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
437	/* LINTED E_BAD_PTR_CAST_ALIGN */
438	w8 =  LOAD_BIG_64(blk + 8 * 8);
439	SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
440	/* LINTED E_BAD_PTR_CAST_ALIGN */
441	w9 =  LOAD_BIG_64(blk + 8 * 9);
442	SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
443	/* LINTED E_BAD_PTR_CAST_ALIGN */
444	w10 =  LOAD_BIG_64(blk + 8 * 10);
445	SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
446	/* LINTED E_BAD_PTR_CAST_ALIGN */
447	w11 =  LOAD_BIG_64(blk + 8 * 11);
448	SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
449	/* LINTED E_BAD_PTR_CAST_ALIGN */
450	w12 =  LOAD_BIG_64(blk + 8 * 12);
451	SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
452	/* LINTED E_BAD_PTR_CAST_ALIGN */
453	w13 =  LOAD_BIG_64(blk + 8 * 13);
454	SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
455	/* LINTED E_BAD_PTR_CAST_ALIGN */
456	w14 =  LOAD_BIG_64(blk + 8 * 14);
457	SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
458	/* LINTED E_BAD_PTR_CAST_ALIGN */
459	w15 =  LOAD_BIG_64(blk + 8 * 15);
460	SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
461
462	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
463	SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
464	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
465	SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
466	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
467	SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
468	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
469	SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
470	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
471	SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
472	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
473	SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
474	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
475	SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
476	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
477	SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
478	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
479	SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
480	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
481	SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
482	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
483	SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
484	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
485	SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
486	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
487	SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
488	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
489	SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
490	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
491	SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
492	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
493	SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
494
495	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
496	SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
497	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
498	SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
499	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
500	SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
501	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
502	SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
503	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
504	SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
505	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
506	SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
507	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
508	SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
509	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
510	SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
511	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
512	SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
513	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
514	SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
515	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
516	SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
517	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
518	SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
519	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
520	SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
521	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
522	SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
523	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
524	SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
525	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
526	SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
527
528	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
529	SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
530	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
531	SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
532	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
533	SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
534	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
535	SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
536	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
537	SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
538	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
539	SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
540	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
541	SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
542	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
543	SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
544	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
545	SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
546	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
547	SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
548	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
549	SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
550	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
551	SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
552	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
553	SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
554	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
555	SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
556	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
557	SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
558	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
559	SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
560
561	w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
562	SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
563	w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
564	SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
565	w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
566	SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
567	w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
568	SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
569	w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
570	SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
571	w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
572	SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
573	w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
574	SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
575	w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
576	SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
577	w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
578	SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
579	w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
580	SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
581	w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
582	SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
583	w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
584	SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
585	w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
586	SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
587	w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
588	SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
589	w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
590	SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
591	w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
592	SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
593
594	ctx->state.s64[0] += a;
595	ctx->state.s64[1] += b;
596	ctx->state.s64[2] += c;
597	ctx->state.s64[3] += d;
598	ctx->state.s64[4] += e;
599	ctx->state.s64[5] += f;
600	ctx->state.s64[6] += g;
601	ctx->state.s64[7] += h;
602
603}
604#endif	/* !__amd64 || !_KERNEL */
605
606
607/*
608 * Encode()
609 *
610 * purpose: to convert a list of numbers from little endian to big endian
611 *   input: uint8_t *	: place to store the converted big endian numbers
612 *	    uint32_t *	: place to get numbers to convert from
613 *          size_t	: the length of the input in bytes
614 *  output: void
615 */
616
617static void
618Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
619    size_t len)
620{
621	size_t		i, j;
622
623#if	defined(__sparc)
624	if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
625		for (i = 0, j = 0; j < len; i++, j += 4) {
626			/* LINTED E_BAD_PTR_CAST_ALIGN */
627			*((uint32_t *)(output + j)) = input[i];
628		}
629	} else {
630#endif	/* little endian -- will work on big endian, but slowly */
631		for (i = 0, j = 0; j < len; i++, j += 4) {
632			output[j]	= (input[i] >> 24) & 0xff;
633			output[j + 1]	= (input[i] >> 16) & 0xff;
634			output[j + 2]	= (input[i] >>  8) & 0xff;
635			output[j + 3]	= input[i] & 0xff;
636		}
637#if	defined(__sparc)
638	}
639#endif
640}
641
642static void
643Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
644    size_t len)
645{
646	size_t		i, j;
647
648#if	defined(__sparc)
649	if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
650		for (i = 0, j = 0; j < len; i++, j += 8) {
651			/* LINTED E_BAD_PTR_CAST_ALIGN */
652			*((uint64_t *)(output + j)) = input[i];
653		}
654	} else {
655#endif	/* little endian -- will work on big endian, but slowly */
656		for (i = 0, j = 0; j < len; i++, j += 8) {
657
658			output[j]	= (input[i] >> 56) & 0xff;
659			output[j + 1]	= (input[i] >> 48) & 0xff;
660			output[j + 2]	= (input[i] >> 40) & 0xff;
661			output[j + 3]	= (input[i] >> 32) & 0xff;
662			output[j + 4]	= (input[i] >> 24) & 0xff;
663			output[j + 5]	= (input[i] >> 16) & 0xff;
664			output[j + 6]	= (input[i] >>  8) & 0xff;
665			output[j + 7]	= input[i] & 0xff;
666		}
667#if	defined(__sparc)
668	}
669#endif
670}
671
672
673void
674SHA2Init(uint64_t mech, SHA2_CTX *ctx)
675{
676
677	switch (mech) {
678	case SHA256_MECH_INFO_TYPE:
679	case SHA256_HMAC_MECH_INFO_TYPE:
680	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
681		ctx->state.s32[0] = 0x6a09e667U;
682		ctx->state.s32[1] = 0xbb67ae85U;
683		ctx->state.s32[2] = 0x3c6ef372U;
684		ctx->state.s32[3] = 0xa54ff53aU;
685		ctx->state.s32[4] = 0x510e527fU;
686		ctx->state.s32[5] = 0x9b05688cU;
687		ctx->state.s32[6] = 0x1f83d9abU;
688		ctx->state.s32[7] = 0x5be0cd19U;
689		break;
690	case SHA384_MECH_INFO_TYPE:
691	case SHA384_HMAC_MECH_INFO_TYPE:
692	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
693		ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
694		ctx->state.s64[1] = 0x629a292a367cd507ULL;
695		ctx->state.s64[2] = 0x9159015a3070dd17ULL;
696		ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
697		ctx->state.s64[4] = 0x67332667ffc00b31ULL;
698		ctx->state.s64[5] = 0x8eb44a8768581511ULL;
699		ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
700		ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
701		break;
702	case SHA512_MECH_INFO_TYPE:
703	case SHA512_HMAC_MECH_INFO_TYPE:
704	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
705		ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
706		ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
707		ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
708		ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
709		ctx->state.s64[4] = 0x510e527fade682d1ULL;
710		ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
711		ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
712		ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
713		break;
714	case SHA512_224_MECH_INFO_TYPE:
715		ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
716		ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
717		ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
718		ctx->state.s64[3] = 0x679DD514582F9FCFULL;
719		ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
720		ctx->state.s64[5] = 0x77E36F7304C48942ULL;
721		ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
722		ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
723		break;
724	case SHA512_256_MECH_INFO_TYPE:
725		ctx->state.s64[0] = 0x22312194FC2BF72CULL;
726		ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
727		ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
728		ctx->state.s64[3] = 0x963877195940EABDULL;
729		ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
730		ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
731		ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
732		ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
733		break;
734#ifdef _KERNEL
735	default:
736		cmn_err(CE_PANIC,
737		    "sha2_init: failed to find a supported algorithm: 0x%x",
738		    (uint32_t)mech);
739
740#endif /* _KERNEL */
741	}
742
743	ctx->algotype = (uint32_t)mech;
744	ctx->count.c64[0] = ctx->count.c64[1] = 0;
745}
746
747#ifndef _KERNEL
748
749// #pragma inline(SHA256Init, SHA384Init, SHA512Init)
750void
751SHA256Init(SHA256_CTX *ctx)
752{
753	SHA2Init(SHA256, ctx);
754}
755
756void
757SHA384Init(SHA384_CTX *ctx)
758{
759	SHA2Init(SHA384, ctx);
760}
761
762void
763SHA512Init(SHA512_CTX *ctx)
764{
765	SHA2Init(SHA512, ctx);
766}
767
768#endif /* _KERNEL */
769
770/*
771 * SHA2Update()
772 *
773 * purpose: continues an sha2 digest operation, using the message block
774 *          to update the context.
775 *   input: SHA2_CTX *	: the context to update
776 *          void *	: the message block
777 *          size_t      : the length of the message block, in bytes
778 *  output: void
779 */
780
781void
782SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
783{
784	uint32_t	i, buf_index, buf_len, buf_limit;
785	const uint8_t	*input = inptr;
786	uint32_t	algotype = ctx->algotype;
787
788	/* check for noop */
789	if (input_len == 0)
790		return;
791
792	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
793		buf_limit = 64;
794
795		/* compute number of bytes mod 64 */
796		buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
797
798		/* update number of bits */
799		if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
800			ctx->count.c32[0]++;
801
802		ctx->count.c32[0] += (input_len >> 29);
803
804	} else {
805		buf_limit = 128;
806
807		/* compute number of bytes mod 128 */
808		buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
809
810		/* update number of bits */
811		if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
812			ctx->count.c64[0]++;
813
814		ctx->count.c64[0] += (input_len >> 29);
815	}
816
817	buf_len = buf_limit - buf_index;
818
819	/* transform as many times as possible */
820	i = 0;
821	if (input_len >= buf_len) {
822
823		/*
824		 * general optimization:
825		 *
826		 * only do initial bcopy() and SHA2Transform() if
827		 * buf_index != 0.  if buf_index == 0, we're just
828		 * wasting our time doing the bcopy() since there
829		 * wasn't any data left over from a previous call to
830		 * SHA2Update().
831		 */
832		if (buf_index) {
833			bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
834			if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
835				SHA256Transform(ctx, ctx->buf_un.buf8);
836			else
837				SHA512Transform(ctx, ctx->buf_un.buf8);
838
839			i = buf_len;
840		}
841
842#if !defined(__amd64) || !defined(_KERNEL)
843		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
844			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
845				SHA256Transform(ctx, &input[i]);
846			}
847		} else {
848			for (; i + buf_limit - 1 < input_len; i += buf_limit) {
849				SHA512Transform(ctx, &input[i]);
850			}
851		}
852
853#else
854		uint32_t block_count;
855		if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
856			block_count = (input_len - i) >> 6;
857			if (block_count > 0) {
858				SHA256TransformBlocks(ctx, &input[i],
859				    block_count);
860				i += block_count << 6;
861			}
862		} else {
863			block_count = (input_len - i) >> 7;
864			if (block_count > 0) {
865				SHA512TransformBlocks(ctx, &input[i],
866				    block_count);
867				i += block_count << 7;
868			}
869		}
870#endif	/* !__amd64 || !_KERNEL */
871
872		/*
873		 * general optimization:
874		 *
875		 * if i and input_len are the same, return now instead
876		 * of calling bcopy(), since the bcopy() in this case
877		 * will be an expensive noop.
878		 */
879
880		if (input_len == i)
881			return;
882
883		buf_index = 0;
884	}
885
886	/* buffer remaining input */
887	bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
888}
889
890
891/*
892 * SHA2Final()
893 *
894 * purpose: ends an sha2 digest operation, finalizing the message digest and
895 *          zeroing the context.
896 *   input: uchar_t *	: a buffer to store the digest
897 *			: The function actually uses void* because many
898 *			: callers pass things other than uchar_t here.
899 *          SHA2_CTX *  : the context to finalize, save, and zero
900 *  output: void
901 */
902
903void
904SHA2Final(void *digest, SHA2_CTX *ctx)
905{
906	uint8_t		bitcount_be[sizeof (ctx->count.c32)];
907	uint8_t		bitcount_be64[sizeof (ctx->count.c64)];
908	uint32_t	index;
909	uint32_t	algotype = ctx->algotype;
910
911	if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
912		index  = (ctx->count.c32[1] >> 3) & 0x3f;
913		Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
914		SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
915		SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
916		Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
917	} else {
918		index  = (ctx->count.c64[1] >> 3) & 0x7f;
919		Encode64(bitcount_be64, ctx->count.c64,
920		    sizeof (bitcount_be64));
921		SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
922		SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
923		if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
924			ctx->state.s64[6] = ctx->state.s64[7] = 0;
925			Encode64(digest, ctx->state.s64,
926			    sizeof (uint64_t) * 6);
927		} else if (algotype == SHA512_224_MECH_INFO_TYPE) {
928			uint8_t last[sizeof (uint64_t)];
929			/*
930			 * Since SHA-512/224 doesn't align well to 64-bit
931			 * boundaries, we must do the encoding in three steps:
932			 * 1) encode the three 64-bit words that fit neatly
933			 * 2) encode the last 64-bit word to a temp buffer
934			 * 3) chop out the lower 32-bits from the temp buffer
935			 *    and append them to the digest
936			 */
937			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
938			Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
939			bcopy(last, (uint8_t *)digest + 24, 4);
940		} else if (algotype == SHA512_256_MECH_INFO_TYPE) {
941			Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
942		} else {
943			Encode64(digest, ctx->state.s64,
944			    sizeof (ctx->state.s64));
945		}
946	}
947
948	/* zeroize sensitive information */
949	bzero(ctx, sizeof (*ctx));
950}
951
952#ifdef _KERNEL
953EXPORT_SYMBOL(SHA2Init);
954EXPORT_SYMBOL(SHA2Update);
955EXPORT_SYMBOL(SHA2Final);
956#endif
957