1/* Copyright (c) 1998,2004-2005 Apple Computer, Inc.  All Rights Reserved.
2 *
3 * NOTICE: USE OF THE MATERIALS ACCOMPANYING THIS NOTICE IS SUBJECT
4 * TO THE TERMS OF THE SIGNED "FAST ELLIPTIC ENCRYPTION (FEE) REFERENCE
5 * SOURCE CODE EVALUATION AGREEMENT" BETWEEN APPLE COMPUTER, INC. AND THE
6 * ORIGINAL LICENSEE THAT OBTAINED THESE MATERIALS FROM APPLE COMPUTER,
7 * INC.  ANY USE OF THESE MATERIALS NOT PERMITTED BY SUCH AGREEMENT WILL
8 * EXPOSE YOU TO LIABILITY.
9 ***************************************************************************
10 *
11 * SHA1_priv.c - low-level SHA-1 hash algorithm.
12 *
13 * Revision History
14 * ----------------
15 * 05 Jan 1998	Doug Mitchell at Apple
16 *	Created, based on source by Peter C. Gutmann.
17 *	Mods: made reentrant, added NIST fix to expand(), eliminated
18 *	unnecessary copy to local W[] array.
19 */
20
21
22/* NIST proposed Secure Hash Standard.
23
24   Written 2 September 1992, Peter C. Gutmann.
25   This implementation placed in the public domain.
26
27   Comments to pgut1@cs.aukuni.ac.nz */
28
29
30#include "SHA1_priv.h"
31#include <string.h>
32
33/* The SHS f()-functions */
34
35#define f1(x,y,z)   ( ( x & y ) | ( ~x & z ) )              /* Rounds  0-19 */
36#define f2(x,y,z)   ( x ^ y ^ z )                           /* Rounds 20-39 */
37#define f3(x,y,z)   ( ( x & y ) | ( x & z ) | ( y & z ) )   /* Rounds 40-59 */
38#define f4(x,y,z)   ( x ^ y ^ z )                           /* Rounds 60-79 */
39
40/* The SHS Mysterious Constants */
41
42#define K1  0x5A827999L     /* Rounds  0-19 */
43#define K2  0x6ED9EBA1L     /* Rounds 20-39 */
44#define K3  0x8F1BBCDCL     /* Rounds 40-59 */
45#define K4  0xCA62C1D6L     /* Rounds 60-79 */
46
47/* SHS initial values */
48
49#define h0init  0x67452301L
50#define h1init  0xEFCDAB89L
51#define h2init  0x98BADCFEL
52#define h3init  0x10325476L
53#define h4init  0xC3D2E1F0L
54
55/* 32-bit rotate - kludged with shifts */
56
57#define S(n,X)  ( ( X << n ) | ( X >> ( 32 - n ) ) )
58
59/* The initial expanding function */
60
61/*
62 * 06 Jan 1998. Added left circular shift per NIST FIPS-180-1 (at
63 * http://www.nist.gov/itl/div897/pubs/fip180-1.htm). Also see
64 * B. Schneier, Applied Cryptography, Second Edition, section 18.7
65 * for info on this addenda to the original NIST spec.
66 */
67#define expand(count) {	\
68    W[count] = W[count - 3] ^ W[count - 8] ^ W[count - 14] ^ W[count - 16]; \
69    W[count] = S(1, W[count]); \
70}
71
72/* The four SHS sub-rounds */
73
74#define subRound1(count)    \
75    { \
76    temp = S( 5, A ) + f1( B, C, D ) + E + W[ count ] + K1; \
77    E = D; \
78    D = C; \
79    C = S( 30, B ); \
80    B = A; \
81    A = temp; \
82    }
83
84#define subRound2(count)    \
85    { \
86    temp = S( 5, A ) + f2( B, C, D ) + E + W[ count ] + K2; \
87    E = D; \
88    D = C; \
89    C = S( 30, B ); \
90    B = A; \
91    A = temp; \
92    }
93
94#define subRound3(count)    \
95    { \
96    temp = S( 5, A ) + f3( B, C, D ) + E + W[ count ] + K3; \
97    E = D; \
98    D = C; \
99    C = S( 30, B ); \
100    B = A; \
101    A = temp; \
102    }
103
104#define subRound4(count)    \
105    { \
106    temp = S( 5, A ) + f4( B, C, D ) + E + W[ count ] + K4; \
107    E = D; \
108    D = C; \
109    C = S( 30, B ); \
110    B = A; \
111    A = temp; \
112    }
113
114/* Initialize the SHS values */
115
116void shsInit( SHS_INFO *shsInfo )
117    {
118    /* Set the h-vars to their initial values */
119    shsInfo->digest[ 0 ] = h0init;
120    shsInfo->digest[ 1 ] = h1init;
121    shsInfo->digest[ 2 ] = h2init;
122    shsInfo->digest[ 3 ] = h3init;
123    shsInfo->digest[ 4 ] = h4init;
124
125    /* Initialise bit count */
126    shsInfo->countLo = shsInfo->countHi = 0L;
127    }
128
129/* Perform the SHS transformation.  Note that this code, like MD5, seems to
130   break some optimizing compilers - it may be necessary to split it into
131   sections, eg based on the four subrounds */
132
133static void shsTransform( SHS_INFO *shsInfo )
134{
135    LONG *W, temp;
136    LONG A, B, C, D, E;
137
138    /* Step A.  Copy the data buffer into the local work buffer. */
139    /* 07 Jan 1998, dmitch: skip this bogus move, and let the caller
140     * copy data directly into the W[] array. To minimize changes,
141     * we'll just increase the size of shsInfo->data[] and make W
142     * a pointer here.
143     */
144    W = shsInfo->data;
145
146    /* Step B.  Expand the 16 words into 64 temporary data words */
147
148    /*
149     * Note: I tried optimizing this via a for loop, and for some reason,
150     * the "optimized" version ran slower on PPC than the original
151     * unrolled version. The optimized version does run faster on i486 than
152     * the unrolled version.
153     *
154     * Similarly, the set of subRounds, below, runs slower on i486 when
155     * optimized via 4 'for' loops. The "optimized" version of that is
156     * a wash on PPC.
157     *
158     * Conclusion: leave both of 'em unrolled. We could ifdef per machine,
159     * but this would get messy once we had more than two architectures.
160     * We may want to revisit this.  --dpm
161     */
162    expand( 16 ); expand( 17 ); expand( 18 ); expand( 19 ); expand( 20 );
163    expand( 21 ); expand( 22 ); expand( 23 ); expand( 24 ); expand( 25 );
164    expand( 26 ); expand( 27 ); expand( 28 ); expand( 29 ); expand( 30 );
165    expand( 31 ); expand( 32 ); expand( 33 ); expand( 34 ); expand( 35 );
166    expand( 36 ); expand( 37 ); expand( 38 ); expand( 39 ); expand( 40 );
167    expand( 41 ); expand( 42 ); expand( 43 ); expand( 44 ); expand( 45 );
168    expand( 46 ); expand( 47 ); expand( 48 ); expand( 49 ); expand( 50 );
169    expand( 51 ); expand( 52 ); expand( 53 ); expand( 54 ); expand( 55 );
170    expand( 56 ); expand( 57 ); expand( 58 ); expand( 59 ); expand( 60 );
171    expand( 61 ); expand( 62 ); expand( 63 ); expand( 64 ); expand( 65 );
172    expand( 66 ); expand( 67 ); expand( 68 ); expand( 69 ); expand( 70 );
173    expand( 71 ); expand( 72 ); expand( 73 ); expand( 74 ); expand( 75 );
174    expand( 76 ); expand( 77 ); expand( 78 ); expand( 79 );
175
176    /* Step C.  Set up first buffer */
177    A = shsInfo->digest[ 0 ];
178    B = shsInfo->digest[ 1 ];
179    C = shsInfo->digest[ 2 ];
180    D = shsInfo->digest[ 3 ];
181    E = shsInfo->digest[ 4 ];
182
183    /* Step D.  Serious mangling, divided into four sub-rounds */
184    subRound1( 0 ); subRound1( 1 ); subRound1( 2 ); subRound1( 3 );
185    subRound1( 4 ); subRound1( 5 ); subRound1( 6 ); subRound1( 7 );
186    subRound1( 8 ); subRound1( 9 ); subRound1( 10 ); subRound1( 11 );
187    subRound1( 12 ); subRound1( 13 ); subRound1( 14 ); subRound1( 15 );
188    subRound1( 16 ); subRound1( 17 ); subRound1( 18 ); subRound1( 19 );
189    subRound2( 20 ); subRound2( 21 ); subRound2( 22 ); subRound2( 23 );
190    subRound2( 24 ); subRound2( 25 ); subRound2( 26 ); subRound2( 27 );
191    subRound2( 28 ); subRound2( 29 ); subRound2( 30 ); subRound2( 31 );
192    subRound2( 32 ); subRound2( 33 ); subRound2( 34 ); subRound2( 35 );
193    subRound2( 36 ); subRound2( 37 ); subRound2( 38 ); subRound2( 39 );
194    subRound3( 40 ); subRound3( 41 ); subRound3( 42 ); subRound3( 43 );
195    subRound3( 44 ); subRound3( 45 ); subRound3( 46 ); subRound3( 47 );
196    subRound3( 48 ); subRound3( 49 ); subRound3( 50 ); subRound3( 51 );
197    subRound3( 52 ); subRound3( 53 ); subRound3( 54 ); subRound3( 55 );
198    subRound3( 56 ); subRound3( 57 ); subRound3( 58 ); subRound3( 59 );
199    subRound4( 60 ); subRound4( 61 ); subRound4( 62 ); subRound4( 63 );
200    subRound4( 64 ); subRound4( 65 ); subRound4( 66 ); subRound4( 67 );
201    subRound4( 68 ); subRound4( 69 ); subRound4( 70 ); subRound4( 71 );
202    subRound4( 72 ); subRound4( 73 ); subRound4( 74 ); subRound4( 75 );
203    subRound4( 76 ); subRound4( 77 ); subRound4( 78 ); subRound4( 79 );
204
205    /* Step E.  Build message digest */
206    shsInfo->digest[ 0 ] += A;
207    shsInfo->digest[ 1 ] += B;
208    shsInfo->digest[ 2 ] += C;
209    shsInfo->digest[ 3 ] += D;
210    shsInfo->digest[ 4 ] += E;
211}
212
213#ifdef __LITTLE_ENDIAN__
214
215/* When run on a little-endian CPU we need to perform byte reversal on an
216   array of longwords.  It is possible to make the code endianness-
217   independant by fiddling around with data at the byte level, but this
218   makes for very slow code, so we rely on the user to sort out endianness
219   at compile time */
220
221static void byteReverse( LONG *buffer, int byteCount )
222
223    {
224    LONG value;
225    int count;
226
227    byteCount /= sizeof( LONG );
228    for( count = 0; count < byteCount; count++ )
229	{
230	value = ( buffer[ count ] << 16 ) | ( buffer[ count ] >> 16 );
231	buffer[ count ] = ( ( value & 0xFF00FF00L ) >> 8 ) | ( ( value & 0x00FF00FFL ) << 8 );
232	}
233    }
234
235#else	/* __LITTLE_ENDIAN__ */
236
237/*
238 * Nop for big-endian machines
239 */
240#define byteReverse( buffer, byteCount )
241
242#endif /* __LITTLE_ENDIAN__ */
243
244
245/* Update SHS for a block of data.  This code assumes that the buffer size
246   is a multiple of SHS_BLOCKSIZE bytes long, which makes the code a lot
247   more efficient since it does away with the need to handle partial blocks
248   between calls to shsUpdate() */
249
250void shsUpdate(
251  SHS_INFO *shsInfo,
252  const BYTE *buffer,
253  int count)
254
255    {
256    /* Update bitcount */
257    if( ( shsInfo->countLo + ( ( LONG ) count << 3 ) ) < shsInfo->countLo )
258	shsInfo->countHi++; /* Carry from low to high bitCount */
259    shsInfo->countLo += ( ( LONG ) count << 3 );
260    shsInfo->countHi += ( ( LONG ) count >> 29 );
261
262    /* Process data in SHS_BLOCKSIZE chunks */
263    while( count >= SHS_BLOCKSIZE )
264	{
265	memcpy( shsInfo->data, buffer, SHS_BLOCKSIZE );
266	byteReverse( shsInfo->data, SHS_BLOCKSIZE );
267	shsTransform( shsInfo );
268	buffer += SHS_BLOCKSIZE;
269	count -= SHS_BLOCKSIZE;
270	}
271
272    /* Handle any remaining bytes of data.  This should only happen once
273       on the final lot of data */
274    memcpy( shsInfo->data, buffer, count );
275    }
276
277void shsFinal(SHS_INFO *shsInfo)
278    {
279    int count;
280    LONG lowBitcount = shsInfo->countLo, highBitcount = shsInfo->countHi;
281
282    /* Compute number of bytes mod 64 */
283    count = ( int ) ( ( shsInfo->countLo >> 3 ) & 0x3F );
284
285    /* Set the first char of padding to 0x80.  This is safe since there is
286       always at least one byte free */
287    ( ( BYTE * ) shsInfo->data )[ count++ ] = 0x80;
288
289    /* Pad out to 56 mod 64 */
290    if( count > 56 )
291	{
292	/* Two lots of padding:  Pad the first block to 64 bytes */
293	memset( ( BYTE * ) &shsInfo->data + count, 0, 64 - count );
294	byteReverse( shsInfo->data, SHS_BLOCKSIZE );
295	shsTransform( shsInfo );
296
297	/* Now fill the next block with 56 bytes */
298	memset( &shsInfo->data, 0, 56 );
299	}
300    else
301	/* Pad block to 56 bytes */
302	memset( ( BYTE * ) &shsInfo->data + count, 0, 56 - count );
303	byteReverse( shsInfo->data, SHS_BLOCKSIZE );
304
305    /* Append length in bits and transform */
306    shsInfo->data[ 14 ] = highBitcount;
307    shsInfo->data[ 15 ] = lowBitcount;
308
309    shsTransform( shsInfo );
310    byteReverse( shsInfo->digest, SHS_DIGESTSIZE );
311    }
312