rc4_enc.c revision 296465
1/* crypto/rc4/rc4_enc.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <openssl/rc4.h>
60#include "rc4_locl.h"
61
62/*-
63 * RC4 as implemented from a posting from
64 * Newsgroups: sci.crypt
65 * From: sterndark@netcom.com (David Sterndark)
66 * Subject: RC4 Algorithm revealed.
67 * Message-ID: <sternCvKL4B.Hyy@netcom.com>
68 * Date: Wed, 14 Sep 1994 06:35:31 GMT
69 */
70
71void RC4(RC4_KEY *key, unsigned long len, const unsigned char *indata,
72         unsigned char *outdata)
73{
74    register RC4_INT *d;
75    register RC4_INT x, y, tx, ty;
76    int i;
77
78    x = key->x;
79    y = key->y;
80    d = key->data;
81
82#if defined(RC4_CHUNK)
83    /*-
84     * The original reason for implementing this(*) was the fact that
85     * pre-21164a Alpha CPUs don't have byte load/store instructions
86     * and e.g. a byte store has to be done with 64-bit load, shift,
87     * and, or and finally 64-bit store. Peaking data and operating
88     * at natural word size made it possible to reduce amount of
89     * instructions as well as to perform early read-ahead without
90     * suffering from RAW (read-after-write) hazard. This resulted
91     * in ~40%(**) performance improvement on 21064 box with gcc.
92     * But it's not only Alpha users who win here:-) Thanks to the
93     * early-n-wide read-ahead this implementation also exhibits
94     * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
95     * on sizeof(RC4_INT)).
96     *
97     * (*)  "this" means code which recognizes the case when input
98     *      and output pointers appear to be aligned at natural CPU
99     *      word boundary
100     * (**) i.e. according to 'apps/openssl speed rc4' benchmark,
101     *      crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
102     *
103     * Cavets.
104     *
105     * - RC4_CHUNK="unsigned long long" should be a #1 choice for
106     *   UltraSPARC. Unfortunately gcc generates very slow code
107     *   (2.5-3 times slower than one generated by Sun's WorkShop
108     *   C) and therefore gcc (at least 2.95 and earlier) should
109     *   always be told that RC4_CHUNK="unsigned long".
110     *
111     *                                      <appro@fy.chalmers.se>
112     */
113
114# define RC4_STEP       ( \
115                        x=(x+1) &0xff,  \
116                        tx=d[x],        \
117                        y=(tx+y)&0xff,  \
118                        ty=d[y],        \
119                        d[y]=tx,        \
120                        d[x]=ty,        \
121                        (RC4_CHUNK)d[(tx+ty)&0xff]\
122                        )
123
124    if ((((unsigned long)indata & (sizeof(RC4_CHUNK) - 1)) |
125         ((unsigned long)outdata & (sizeof(RC4_CHUNK) - 1))) == 0) {
126        RC4_CHUNK ichunk, otp;
127        const union {
128            long one;
129            char little;
130        } is_endian = {
131            1
132        };
133
134        /*-
135         * I reckon we can afford to implement both endian
136         * cases and to decide which way to take at run-time
137         * because the machine code appears to be very compact
138         * and redundant 1-2KB is perfectly tolerable (i.e.
139         * in case the compiler fails to eliminate it:-). By
140         * suggestion from Terrel Larson <terr@terralogic.net>
141         * who also stands for the is_endian union:-)
142         *
143         * Special notes.
144         *
145         * - is_endian is declared automatic as doing otherwise
146         *   (declaring static) prevents gcc from eliminating
147         *   the redundant code;
148         * - compilers (those I've tried) don't seem to have
149         *   problems eliminating either the operators guarded
150         *   by "if (sizeof(RC4_CHUNK)==8)" or the condition
151         *   expressions themselves so I've got 'em to replace
152         *   corresponding #ifdefs from the previous version;
153         * - I chose to let the redundant switch cases when
154         *   sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
155         *   before);
156         * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
157         *   [LB]ESHFT guards against "shift is out of range"
158         *   warnings when sizeof(RC4_CHUNK)!=8
159         *
160         *                      <appro@fy.chalmers.se>
161         */
162        if (!is_endian.little) { /* BIG-ENDIAN CASE */
163# define BESHFT(c)      (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
164            for (; len & ~(sizeof(RC4_CHUNK) - 1); len -= sizeof(RC4_CHUNK)) {
165                ichunk = *(RC4_CHUNK *) indata;
166                otp = RC4_STEP << BESHFT(0);
167                otp |= RC4_STEP << BESHFT(1);
168                otp |= RC4_STEP << BESHFT(2);
169                otp |= RC4_STEP << BESHFT(3);
170                if (sizeof(RC4_CHUNK) == 8) {
171                    otp |= RC4_STEP << BESHFT(4);
172                    otp |= RC4_STEP << BESHFT(5);
173                    otp |= RC4_STEP << BESHFT(6);
174                    otp |= RC4_STEP << BESHFT(7);
175                }
176                *(RC4_CHUNK *) outdata = otp ^ ichunk;
177                indata += sizeof(RC4_CHUNK);
178                outdata += sizeof(RC4_CHUNK);
179            }
180            if (len) {
181                RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk;
182
183                ichunk = *(RC4_CHUNK *) indata;
184                ochunk = *(RC4_CHUNK *) outdata;
185                otp = 0;
186                i = BESHFT(0);
187                mask <<= (sizeof(RC4_CHUNK) - len) << 3;
188                switch (len & (sizeof(RC4_CHUNK) - 1)) {
189                case 7:
190                    otp = RC4_STEP << i, i -= 8;
191                case 6:
192                    otp |= RC4_STEP << i, i -= 8;
193                case 5:
194                    otp |= RC4_STEP << i, i -= 8;
195                case 4:
196                    otp |= RC4_STEP << i, i -= 8;
197                case 3:
198                    otp |= RC4_STEP << i, i -= 8;
199                case 2:
200                    otp |= RC4_STEP << i, i -= 8;
201                case 1:
202                    otp |= RC4_STEP << i, i -= 8;
203                case 0:;       /*
204                                 * it's never the case,
205                                 * but it has to be here
206                                 * for ultrix?
207                                 */
208                }
209                ochunk &= ~mask;
210                ochunk |= (otp ^ ichunk) & mask;
211                *(RC4_CHUNK *) outdata = ochunk;
212            }
213            key->x = x;
214            key->y = y;
215            return;
216        } else {                /* LITTLE-ENDIAN CASE */
217# define LESHFT(c)      (((c)*8)&(sizeof(RC4_CHUNK)*8-1))
218            for (; len & ~(sizeof(RC4_CHUNK) - 1); len -= sizeof(RC4_CHUNK)) {
219                ichunk = *(RC4_CHUNK *) indata;
220                otp = RC4_STEP;
221                otp |= RC4_STEP << 8;
222                otp |= RC4_STEP << 16;
223                otp |= RC4_STEP << 24;
224                if (sizeof(RC4_CHUNK) == 8) {
225                    otp |= RC4_STEP << LESHFT(4);
226                    otp |= RC4_STEP << LESHFT(5);
227                    otp |= RC4_STEP << LESHFT(6);
228                    otp |= RC4_STEP << LESHFT(7);
229                }
230                *(RC4_CHUNK *) outdata = otp ^ ichunk;
231                indata += sizeof(RC4_CHUNK);
232                outdata += sizeof(RC4_CHUNK);
233            }
234            if (len) {
235                RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk;
236
237                ichunk = *(RC4_CHUNK *) indata;
238                ochunk = *(RC4_CHUNK *) outdata;
239                otp = 0;
240                i = 0;
241                mask >>= (sizeof(RC4_CHUNK) - len) << 3;
242                switch (len & (sizeof(RC4_CHUNK) - 1)) {
243                case 7:
244                    otp = RC4_STEP, i += 8;
245                case 6:
246                    otp |= RC4_STEP << i, i += 8;
247                case 5:
248                    otp |= RC4_STEP << i, i += 8;
249                case 4:
250                    otp |= RC4_STEP << i, i += 8;
251                case 3:
252                    otp |= RC4_STEP << i, i += 8;
253                case 2:
254                    otp |= RC4_STEP << i, i += 8;
255                case 1:
256                    otp |= RC4_STEP << i, i += 8;
257                case 0:;       /*
258                                 * it's never the case,
259                                 * but it has to be here
260                                 * for ultrix?
261                                 */
262                }
263                ochunk &= ~mask;
264                ochunk |= (otp ^ ichunk) & mask;
265                *(RC4_CHUNK *) outdata = ochunk;
266            }
267            key->x = x;
268            key->y = y;
269            return;
270        }
271    }
272#endif
273#define LOOP(in,out) \
274                x=((x+1)&0xff); \
275                tx=d[x]; \
276                y=(tx+y)&0xff; \
277                d[x]=ty=d[y]; \
278                d[y]=tx; \
279                (out) = d[(tx+ty)&0xff]^ (in);
280
281#ifndef RC4_INDEX
282# define RC4_LOOP(a,b,i) LOOP(*((a)++),*((b)++))
283#else
284# define RC4_LOOP(a,b,i) LOOP(a[i],b[i])
285#endif
286
287    i = (int)(len >> 3L);
288    if (i) {
289        for (;;) {
290            RC4_LOOP(indata, outdata, 0);
291            RC4_LOOP(indata, outdata, 1);
292            RC4_LOOP(indata, outdata, 2);
293            RC4_LOOP(indata, outdata, 3);
294            RC4_LOOP(indata, outdata, 4);
295            RC4_LOOP(indata, outdata, 5);
296            RC4_LOOP(indata, outdata, 6);
297            RC4_LOOP(indata, outdata, 7);
298#ifdef RC4_INDEX
299            indata += 8;
300            outdata += 8;
301#endif
302            if (--i == 0)
303                break;
304        }
305    }
306    i = (int)len & 0x07;
307    if (i) {
308        for (;;) {
309            RC4_LOOP(indata, outdata, 0);
310            if (--i == 0)
311                break;
312            RC4_LOOP(indata, outdata, 1);
313            if (--i == 0)
314                break;
315            RC4_LOOP(indata, outdata, 2);
316            if (--i == 0)
317                break;
318            RC4_LOOP(indata, outdata, 3);
319            if (--i == 0)
320                break;
321            RC4_LOOP(indata, outdata, 4);
322            if (--i == 0)
323                break;
324            RC4_LOOP(indata, outdata, 5);
325            if (--i == 0)
326                break;
327            RC4_LOOP(indata, outdata, 6);
328            if (--i == 0)
329                break;
330        }
331    }
332    key->x = x;
333    key->y = y;
334}
335