155714Skris/* crypto/rc4/rc4_enc.c */
255714Skris/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
355714Skris * All rights reserved.
455714Skris *
555714Skris * This package is an SSL implementation written
655714Skris * by Eric Young (eay@cryptsoft.com).
755714Skris * The implementation was written so as to conform with Netscapes SSL.
8280304Sjkim *
955714Skris * This library is free for commercial and non-commercial use as long as
1055714Skris * the following conditions are aheared to.  The following conditions
1155714Skris * apply to all code found in this distribution, be it the RC4, RSA,
1255714Skris * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1355714Skris * included with this distribution is covered by the same copyright terms
1455714Skris * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15280304Sjkim *
1655714Skris * Copyright remains Eric Young's, and as such any Copyright notices in
1755714Skris * the code are not to be removed.
1855714Skris * If this package is used in a product, Eric Young should be given attribution
1955714Skris * as the author of the parts of the library used.
2055714Skris * This can be in the form of a textual message at program startup or
2155714Skris * in documentation (online or textual) provided with the package.
22280304Sjkim *
2355714Skris * Redistribution and use in source and binary forms, with or without
2455714Skris * modification, are permitted provided that the following conditions
2555714Skris * are met:
2655714Skris * 1. Redistributions of source code must retain the copyright
2755714Skris *    notice, this list of conditions and the following disclaimer.
2855714Skris * 2. Redistributions in binary form must reproduce the above copyright
2955714Skris *    notice, this list of conditions and the following disclaimer in the
3055714Skris *    documentation and/or other materials provided with the distribution.
3155714Skris * 3. All advertising materials mentioning features or use of this software
3255714Skris *    must display the following acknowledgement:
3355714Skris *    "This product includes cryptographic software written by
3455714Skris *     Eric Young (eay@cryptsoft.com)"
3555714Skris *    The word 'cryptographic' can be left out if the rouines from the library
3655714Skris *    being used are not cryptographic related :-).
37280304Sjkim * 4. If you include any Windows specific code (or a derivative thereof) from
3855714Skris *    the apps directory (application code) you must include an acknowledgement:
3955714Skris *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40280304Sjkim *
4155714Skris * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
4255714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4355714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4455714Skris * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
4555714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4655714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4755714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4855714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4955714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5055714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5155714Skris * SUCH DAMAGE.
52280304Sjkim *
5355714Skris * The licence and distribution terms for any publically available version or
5455714Skris * derivative of this code cannot be changed.  i.e. this code cannot simply be
5555714Skris * copied and put under another distribution licence
5655714Skris * [including the GNU Public Licence.]
5755714Skris */
5855714Skris
5955714Skris#include <openssl/rc4.h>
6055714Skris#include "rc4_locl.h"
6155714Skris
62280304Sjkim/*-
63280304Sjkim * RC4 as implemented from a posting from
6455714Skris * Newsgroups: sci.crypt
6555714Skris * From: sterndark@netcom.com (David Sterndark)
6655714Skris * Subject: RC4 Algorithm revealed.
6755714Skris * Message-ID: <sternCvKL4B.Hyy@netcom.com>
6855714Skris * Date: Wed, 14 Sep 1994 06:35:31 GMT
6955714Skris */
7055714Skris
71238405Sjkimvoid RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
72280304Sjkim         unsigned char *outdata)
73280304Sjkim{
74280304Sjkim    register RC4_INT *d;
75280304Sjkim    register RC4_INT x, y, tx, ty;
76280304Sjkim    size_t i;
7755714Skris
78280304Sjkim    x = key->x;
79280304Sjkim    y = key->y;
80280304Sjkim    d = key->data;
81280304Sjkim
8259191Skris#if defined(RC4_CHUNK)
83280304Sjkim    /*-
84280304Sjkim     * The original reason for implementing this(*) was the fact that
85280304Sjkim     * pre-21164a Alpha CPUs don't have byte load/store instructions
86280304Sjkim     * and e.g. a byte store has to be done with 64-bit load, shift,
87280304Sjkim     * and, or and finally 64-bit store. Peaking data and operating
88280304Sjkim     * at natural word size made it possible to reduce amount of
89280304Sjkim     * instructions as well as to perform early read-ahead without
90280304Sjkim     * suffering from RAW (read-after-write) hazard. This resulted
91280304Sjkim     * in ~40%(**) performance improvement on 21064 box with gcc.
92280304Sjkim     * But it's not only Alpha users who win here:-) Thanks to the
93280304Sjkim     * early-n-wide read-ahead this implementation also exhibits
94280304Sjkim     * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
95280304Sjkim     * on sizeof(RC4_INT)).
96280304Sjkim     *
97280304Sjkim     * (*)  "this" means code which recognizes the case when input
98280304Sjkim     *      and output pointers appear to be aligned at natural CPU
99280304Sjkim     *      word boundary
100280304Sjkim     * (**) i.e. according to 'apps/openssl speed rc4' benchmark,
101280304Sjkim     *      crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
102280304Sjkim     *
103280304Sjkim     * Cavets.
104280304Sjkim     *
105280304Sjkim     * - RC4_CHUNK="unsigned long long" should be a #1 choice for
106280304Sjkim     *   UltraSPARC. Unfortunately gcc generates very slow code
107280304Sjkim     *   (2.5-3 times slower than one generated by Sun's WorkShop
108280304Sjkim     *   C) and therefore gcc (at least 2.95 and earlier) should
109280304Sjkim     *   always be told that RC4_CHUNK="unsigned long".
110280304Sjkim     *
111280304Sjkim     *                                      <appro@fy.chalmers.se>
112280304Sjkim     */
11359191Skris
114280304Sjkim# define RC4_STEP       ( \
115280304Sjkim                        x=(x+1) &0xff,  \
116280304Sjkim                        tx=d[x],        \
117280304Sjkim                        y=(tx+y)&0xff,  \
118280304Sjkim                        ty=d[y],        \
119280304Sjkim                        d[y]=tx,        \
120280304Sjkim                        d[x]=ty,        \
121280304Sjkim                        (RC4_CHUNK)d[(tx+ty)&0xff]\
122280304Sjkim                        )
12359191Skris
124280304Sjkim    if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) |
125280304Sjkim         ((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0) {
126280304Sjkim        RC4_CHUNK ichunk, otp;
127280304Sjkim        const union {
128280304Sjkim            long one;
129280304Sjkim            char little;
130280304Sjkim        } is_endian = {
131280304Sjkim            1
132280304Sjkim        };
13359191Skris
134280304Sjkim        /*-
135280304Sjkim         * I reckon we can afford to implement both endian
136280304Sjkim         * cases and to decide which way to take at run-time
137280304Sjkim         * because the machine code appears to be very compact
138280304Sjkim         * and redundant 1-2KB is perfectly tolerable (i.e.
139280304Sjkim         * in case the compiler fails to eliminate it:-). By
140280304Sjkim         * suggestion from Terrel Larson <terr@terralogic.net>
141280304Sjkim         * who also stands for the is_endian union:-)
142280304Sjkim         *
143280304Sjkim         * Special notes.
144280304Sjkim         *
145280304Sjkim         * - is_endian is declared automatic as doing otherwise
146280304Sjkim         *   (declaring static) prevents gcc from eliminating
147280304Sjkim         *   the redundant code;
148280304Sjkim         * - compilers (those I've tried) don't seem to have
149280304Sjkim         *   problems eliminating either the operators guarded
150280304Sjkim         *   by "if (sizeof(RC4_CHUNK)==8)" or the condition
151280304Sjkim         *   expressions themselves so I've got 'em to replace
152280304Sjkim         *   corresponding #ifdefs from the previous version;
153280304Sjkim         * - I chose to let the redundant switch cases when
154280304Sjkim         *   sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
155280304Sjkim         *   before);
156280304Sjkim         * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
157280304Sjkim         *   [LB]ESHFT guards against "shift is out of range"
158280304Sjkim         *   warnings when sizeof(RC4_CHUNK)!=8
159280304Sjkim         *
160280304Sjkim         *                      <appro@fy.chalmers.se>
161280304Sjkim         */
162280304Sjkim        if (!is_endian.little) { /* BIG-ENDIAN CASE */
163280304Sjkim# define BESHFT(c)      (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
164280304Sjkim            for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
165280304Sjkim                ichunk = *(RC4_CHUNK *) indata;
166280304Sjkim                otp = RC4_STEP << BESHFT(0);
167280304Sjkim                otp |= RC4_STEP << BESHFT(1);
168280304Sjkim                otp |= RC4_STEP << BESHFT(2);
169280304Sjkim                otp |= RC4_STEP << BESHFT(3);
170280304Sjkim                if (sizeof(RC4_CHUNK) == 8) {
171280304Sjkim                    otp |= RC4_STEP << BESHFT(4);
172280304Sjkim                    otp |= RC4_STEP << BESHFT(5);
173280304Sjkim                    otp |= RC4_STEP << BESHFT(6);
174280304Sjkim                    otp |= RC4_STEP << BESHFT(7);
175280304Sjkim                }
176280304Sjkim                *(RC4_CHUNK *) outdata = otp ^ ichunk;
177280304Sjkim                indata += sizeof(RC4_CHUNK);
178280304Sjkim                outdata += sizeof(RC4_CHUNK);
179280304Sjkim            }
180280304Sjkim            if (len) {
181280304Sjkim                RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk;
18259191Skris
183280304Sjkim                ichunk = *(RC4_CHUNK *) indata;
184280304Sjkim                ochunk = *(RC4_CHUNK *) outdata;
185280304Sjkim                otp = 0;
186280304Sjkim                i = BESHFT(0);
187280304Sjkim                mask <<= (sizeof(RC4_CHUNK) - len) << 3;
188280304Sjkim                switch (len & (sizeof(RC4_CHUNK) - 1)) {
189280304Sjkim                case 7:
190280304Sjkim                    otp = RC4_STEP << i, i -= 8;
191280304Sjkim                case 6:
192280304Sjkim                    otp |= RC4_STEP << i, i -= 8;
193280304Sjkim                case 5:
194280304Sjkim                    otp |= RC4_STEP << i, i -= 8;
195280304Sjkim                case 4:
196280304Sjkim                    otp |= RC4_STEP << i, i -= 8;
197280304Sjkim                case 3:
198280304Sjkim                    otp |= RC4_STEP << i, i -= 8;
199280304Sjkim                case 2:
200280304Sjkim                    otp |= RC4_STEP << i, i -= 8;
201280304Sjkim                case 1:
202280304Sjkim                    otp |= RC4_STEP << i, i -= 8;
203280304Sjkim                case 0:;       /*
204280304Sjkim                                 * it's never the case,
205280304Sjkim                                 * but it has to be here
206280304Sjkim                                 * for ultrix?
207280304Sjkim                                 */
208280304Sjkim                }
209280304Sjkim                ochunk &= ~mask;
210280304Sjkim                ochunk |= (otp ^ ichunk) & mask;
211280304Sjkim                *(RC4_CHUNK *) outdata = ochunk;
212280304Sjkim            }
213280304Sjkim            key->x = x;
214280304Sjkim            key->y = y;
215280304Sjkim            return;
216280304Sjkim        } else {                /* LITTLE-ENDIAN CASE */
217280304Sjkim# define LESHFT(c)      (((c)*8)&(sizeof(RC4_CHUNK)*8-1))
218280304Sjkim            for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) {
219280304Sjkim                ichunk = *(RC4_CHUNK *) indata;
220280304Sjkim                otp = RC4_STEP;
221280304Sjkim                otp |= RC4_STEP << 8;
222280304Sjkim                otp |= RC4_STEP << 16;
223280304Sjkim                otp |= RC4_STEP << 24;
224280304Sjkim                if (sizeof(RC4_CHUNK) == 8) {
225280304Sjkim                    otp |= RC4_STEP << LESHFT(4);
226280304Sjkim                    otp |= RC4_STEP << LESHFT(5);
227280304Sjkim                    otp |= RC4_STEP << LESHFT(6);
228280304Sjkim                    otp |= RC4_STEP << LESHFT(7);
229280304Sjkim                }
230280304Sjkim                *(RC4_CHUNK *) outdata = otp ^ ichunk;
231280304Sjkim                indata += sizeof(RC4_CHUNK);
232280304Sjkim                outdata += sizeof(RC4_CHUNK);
233280304Sjkim            }
234280304Sjkim            if (len) {
235280304Sjkim                RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk;
23659191Skris
237280304Sjkim                ichunk = *(RC4_CHUNK *) indata;
238280304Sjkim                ochunk = *(RC4_CHUNK *) outdata;
239280304Sjkim                otp = 0;
240280304Sjkim                i = 0;
241280304Sjkim                mask >>= (sizeof(RC4_CHUNK) - len) << 3;
242280304Sjkim                switch (len & (sizeof(RC4_CHUNK) - 1)) {
243280304Sjkim                case 7:
244280304Sjkim                    otp = RC4_STEP, i += 8;
245280304Sjkim                case 6:
246280304Sjkim                    otp |= RC4_STEP << i, i += 8;
247280304Sjkim                case 5:
248280304Sjkim                    otp |= RC4_STEP << i, i += 8;
249280304Sjkim                case 4:
250280304Sjkim                    otp |= RC4_STEP << i, i += 8;
251280304Sjkim                case 3:
252280304Sjkim                    otp |= RC4_STEP << i, i += 8;
253280304Sjkim                case 2:
254280304Sjkim                    otp |= RC4_STEP << i, i += 8;
255280304Sjkim                case 1:
256280304Sjkim                    otp |= RC4_STEP << i, i += 8;
257280304Sjkim                case 0:;       /*
258280304Sjkim                                 * it's never the case,
259280304Sjkim                                 * but it has to be here
260280304Sjkim                                 * for ultrix?
261280304Sjkim                                 */
262280304Sjkim                }
263280304Sjkim                ochunk &= ~mask;
264280304Sjkim                ochunk |= (otp ^ ichunk) & mask;
265280304Sjkim                *(RC4_CHUNK *) outdata = ochunk;
266280304Sjkim            }
267280304Sjkim            key->x = x;
268280304Sjkim            key->y = y;
269280304Sjkim            return;
270280304Sjkim        }
271280304Sjkim    }
27259191Skris#endif
27355714Skris#define LOOP(in,out) \
274280304Sjkim                x=((x+1)&0xff); \
275280304Sjkim                tx=d[x]; \
276280304Sjkim                y=(tx+y)&0xff; \
277280304Sjkim                d[x]=ty=d[y]; \
278280304Sjkim                d[y]=tx; \
279280304Sjkim                (out) = d[(tx+ty)&0xff]^ (in);
28055714Skris
28155714Skris#ifndef RC4_INDEX
282280304Sjkim# define RC4_LOOP(a,b,i) LOOP(*((a)++),*((b)++))
28355714Skris#else
284280304Sjkim# define RC4_LOOP(a,b,i) LOOP(a[i],b[i])
28555714Skris#endif
28655714Skris
287280304Sjkim    i = len >> 3;
288280304Sjkim    if (i) {
289280304Sjkim        for (;;) {
290280304Sjkim            RC4_LOOP(indata, outdata, 0);
291280304Sjkim            RC4_LOOP(indata, outdata, 1);
292280304Sjkim            RC4_LOOP(indata, outdata, 2);
293280304Sjkim            RC4_LOOP(indata, outdata, 3);
294280304Sjkim            RC4_LOOP(indata, outdata, 4);
295280304Sjkim            RC4_LOOP(indata, outdata, 5);
296280304Sjkim            RC4_LOOP(indata, outdata, 6);
297280304Sjkim            RC4_LOOP(indata, outdata, 7);
29855714Skris#ifdef RC4_INDEX
299280304Sjkim            indata += 8;
300280304Sjkim            outdata += 8;
30155714Skris#endif
302280304Sjkim            if (--i == 0)
303280304Sjkim                break;
304280304Sjkim        }
305280304Sjkim    }
306280304Sjkim    i = len & 0x07;
307280304Sjkim    if (i) {
308280304Sjkim        for (;;) {
309280304Sjkim            RC4_LOOP(indata, outdata, 0);
310280304Sjkim            if (--i == 0)
311280304Sjkim                break;
312280304Sjkim            RC4_LOOP(indata, outdata, 1);
313280304Sjkim            if (--i == 0)
314280304Sjkim                break;
315280304Sjkim            RC4_LOOP(indata, outdata, 2);
316280304Sjkim            if (--i == 0)
317280304Sjkim                break;
318280304Sjkim            RC4_LOOP(indata, outdata, 3);
319280304Sjkim            if (--i == 0)
320280304Sjkim                break;
321280304Sjkim            RC4_LOOP(indata, outdata, 4);
322280304Sjkim            if (--i == 0)
323280304Sjkim                break;
324280304Sjkim            RC4_LOOP(indata, outdata, 5);
325280304Sjkim            if (--i == 0)
326280304Sjkim                break;
327280304Sjkim            RC4_LOOP(indata, outdata, 6);
328280304Sjkim            if (--i == 0)
329280304Sjkim                break;
330280304Sjkim        }
331280304Sjkim    }
332280304Sjkim    key->x = x;
333280304Sjkim    key->y = y;
334280304Sjkim}
335