155714Skris/* crypto/rc4/rc4_enc.c */ 255714Skris/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 355714Skris * All rights reserved. 455714Skris * 555714Skris * This package is an SSL implementation written 655714Skris * by Eric Young (eay@cryptsoft.com). 755714Skris * The implementation was written so as to conform with Netscapes SSL. 8280304Sjkim * 955714Skris * This library is free for commercial and non-commercial use as long as 1055714Skris * the following conditions are aheared to. The following conditions 1155714Skris * apply to all code found in this distribution, be it the RC4, RSA, 1255714Skris * lhash, DES, etc., code; not just the SSL code. The SSL documentation 1355714Skris * included with this distribution is covered by the same copyright terms 1455714Skris * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15280304Sjkim * 1655714Skris * Copyright remains Eric Young's, and as such any Copyright notices in 1755714Skris * the code are not to be removed. 1855714Skris * If this package is used in a product, Eric Young should be given attribution 1955714Skris * as the author of the parts of the library used. 2055714Skris * This can be in the form of a textual message at program startup or 2155714Skris * in documentation (online or textual) provided with the package. 22280304Sjkim * 2355714Skris * Redistribution and use in source and binary forms, with or without 2455714Skris * modification, are permitted provided that the following conditions 2555714Skris * are met: 2655714Skris * 1. Redistributions of source code must retain the copyright 2755714Skris * notice, this list of conditions and the following disclaimer. 2855714Skris * 2. Redistributions in binary form must reproduce the above copyright 2955714Skris * notice, this list of conditions and the following disclaimer in the 3055714Skris * documentation and/or other materials provided with the distribution. 3155714Skris * 3. All advertising materials mentioning features or use of this software 3255714Skris * must display the following acknowledgement: 3355714Skris * "This product includes cryptographic software written by 3455714Skris * Eric Young (eay@cryptsoft.com)" 3555714Skris * The word 'cryptographic' can be left out if the rouines from the library 3655714Skris * being used are not cryptographic related :-). 37280304Sjkim * 4. If you include any Windows specific code (or a derivative thereof) from 3855714Skris * the apps directory (application code) you must include an acknowledgement: 3955714Skris * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40280304Sjkim * 4155714Skris * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 4255714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 4355714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4455714Skris * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 4555714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4655714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 4755714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 4855714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 4955714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 5055714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 5155714Skris * SUCH DAMAGE. 52280304Sjkim * 5355714Skris * The licence and distribution terms for any publically available version or 5455714Skris * derivative of this code cannot be changed. i.e. this code cannot simply be 5555714Skris * copied and put under another distribution licence 5655714Skris * [including the GNU Public Licence.] 5755714Skris */ 5855714Skris 5955714Skris#include <openssl/rc4.h> 6055714Skris#include "rc4_locl.h" 6155714Skris 62280304Sjkim/*- 63280304Sjkim * RC4 as implemented from a posting from 6455714Skris * Newsgroups: sci.crypt 6555714Skris * From: sterndark@netcom.com (David Sterndark) 6655714Skris * Subject: RC4 Algorithm revealed. 6755714Skris * Message-ID: <sternCvKL4B.Hyy@netcom.com> 6855714Skris * Date: Wed, 14 Sep 1994 06:35:31 GMT 6955714Skris */ 7055714Skris 71238405Sjkimvoid RC4(RC4_KEY *key, size_t len, const unsigned char *indata, 72280304Sjkim unsigned char *outdata) 73280304Sjkim{ 74280304Sjkim register RC4_INT *d; 75280304Sjkim register RC4_INT x, y, tx, ty; 76280304Sjkim size_t i; 7755714Skris 78280304Sjkim x = key->x; 79280304Sjkim y = key->y; 80280304Sjkim d = key->data; 81280304Sjkim 8259191Skris#if defined(RC4_CHUNK) 83280304Sjkim /*- 84280304Sjkim * The original reason for implementing this(*) was the fact that 85280304Sjkim * pre-21164a Alpha CPUs don't have byte load/store instructions 86280304Sjkim * and e.g. a byte store has to be done with 64-bit load, shift, 87280304Sjkim * and, or and finally 64-bit store. Peaking data and operating 88280304Sjkim * at natural word size made it possible to reduce amount of 89280304Sjkim * instructions as well as to perform early read-ahead without 90280304Sjkim * suffering from RAW (read-after-write) hazard. This resulted 91280304Sjkim * in ~40%(**) performance improvement on 21064 box with gcc. 92280304Sjkim * But it's not only Alpha users who win here:-) Thanks to the 93280304Sjkim * early-n-wide read-ahead this implementation also exhibits 94280304Sjkim * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending 95280304Sjkim * on sizeof(RC4_INT)). 96280304Sjkim * 97280304Sjkim * (*) "this" means code which recognizes the case when input 98280304Sjkim * and output pointers appear to be aligned at natural CPU 99280304Sjkim * word boundary 100280304Sjkim * (**) i.e. according to 'apps/openssl speed rc4' benchmark, 101280304Sjkim * crypto/rc4/rc4speed.c exhibits almost 70% speed-up... 102280304Sjkim * 103280304Sjkim * Cavets. 104280304Sjkim * 105280304Sjkim * - RC4_CHUNK="unsigned long long" should be a #1 choice for 106280304Sjkim * UltraSPARC. Unfortunately gcc generates very slow code 107280304Sjkim * (2.5-3 times slower than one generated by Sun's WorkShop 108280304Sjkim * C) and therefore gcc (at least 2.95 and earlier) should 109280304Sjkim * always be told that RC4_CHUNK="unsigned long". 110280304Sjkim * 111280304Sjkim * <appro@fy.chalmers.se> 112280304Sjkim */ 11359191Skris 114280304Sjkim# define RC4_STEP ( \ 115280304Sjkim x=(x+1) &0xff, \ 116280304Sjkim tx=d[x], \ 117280304Sjkim y=(tx+y)&0xff, \ 118280304Sjkim ty=d[y], \ 119280304Sjkim d[y]=tx, \ 120280304Sjkim d[x]=ty, \ 121280304Sjkim (RC4_CHUNK)d[(tx+ty)&0xff]\ 122280304Sjkim ) 12359191Skris 124280304Sjkim if ((((size_t)indata & (sizeof(RC4_CHUNK) - 1)) | 125280304Sjkim ((size_t)outdata & (sizeof(RC4_CHUNK) - 1))) == 0) { 126280304Sjkim RC4_CHUNK ichunk, otp; 127280304Sjkim const union { 128280304Sjkim long one; 129280304Sjkim char little; 130280304Sjkim } is_endian = { 131280304Sjkim 1 132280304Sjkim }; 13359191Skris 134280304Sjkim /*- 135280304Sjkim * I reckon we can afford to implement both endian 136280304Sjkim * cases and to decide which way to take at run-time 137280304Sjkim * because the machine code appears to be very compact 138280304Sjkim * and redundant 1-2KB is perfectly tolerable (i.e. 139280304Sjkim * in case the compiler fails to eliminate it:-). By 140280304Sjkim * suggestion from Terrel Larson <terr@terralogic.net> 141280304Sjkim * who also stands for the is_endian union:-) 142280304Sjkim * 143280304Sjkim * Special notes. 144280304Sjkim * 145280304Sjkim * - is_endian is declared automatic as doing otherwise 146280304Sjkim * (declaring static) prevents gcc from eliminating 147280304Sjkim * the redundant code; 148280304Sjkim * - compilers (those I've tried) don't seem to have 149280304Sjkim * problems eliminating either the operators guarded 150280304Sjkim * by "if (sizeof(RC4_CHUNK)==8)" or the condition 151280304Sjkim * expressions themselves so I've got 'em to replace 152280304Sjkim * corresponding #ifdefs from the previous version; 153280304Sjkim * - I chose to let the redundant switch cases when 154280304Sjkim * sizeof(RC4_CHUNK)!=8 be (were also #ifdefed 155280304Sjkim * before); 156280304Sjkim * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in 157280304Sjkim * [LB]ESHFT guards against "shift is out of range" 158280304Sjkim * warnings when sizeof(RC4_CHUNK)!=8 159280304Sjkim * 160280304Sjkim * <appro@fy.chalmers.se> 161280304Sjkim */ 162280304Sjkim if (!is_endian.little) { /* BIG-ENDIAN CASE */ 163280304Sjkim# define BESHFT(c) (((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1)) 164280304Sjkim for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) { 165280304Sjkim ichunk = *(RC4_CHUNK *) indata; 166280304Sjkim otp = RC4_STEP << BESHFT(0); 167280304Sjkim otp |= RC4_STEP << BESHFT(1); 168280304Sjkim otp |= RC4_STEP << BESHFT(2); 169280304Sjkim otp |= RC4_STEP << BESHFT(3); 170280304Sjkim if (sizeof(RC4_CHUNK) == 8) { 171280304Sjkim otp |= RC4_STEP << BESHFT(4); 172280304Sjkim otp |= RC4_STEP << BESHFT(5); 173280304Sjkim otp |= RC4_STEP << BESHFT(6); 174280304Sjkim otp |= RC4_STEP << BESHFT(7); 175280304Sjkim } 176280304Sjkim *(RC4_CHUNK *) outdata = otp ^ ichunk; 177280304Sjkim indata += sizeof(RC4_CHUNK); 178280304Sjkim outdata += sizeof(RC4_CHUNK); 179280304Sjkim } 180280304Sjkim if (len) { 181280304Sjkim RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk; 18259191Skris 183280304Sjkim ichunk = *(RC4_CHUNK *) indata; 184280304Sjkim ochunk = *(RC4_CHUNK *) outdata; 185280304Sjkim otp = 0; 186280304Sjkim i = BESHFT(0); 187280304Sjkim mask <<= (sizeof(RC4_CHUNK) - len) << 3; 188280304Sjkim switch (len & (sizeof(RC4_CHUNK) - 1)) { 189280304Sjkim case 7: 190280304Sjkim otp = RC4_STEP << i, i -= 8; 191280304Sjkim case 6: 192280304Sjkim otp |= RC4_STEP << i, i -= 8; 193280304Sjkim case 5: 194280304Sjkim otp |= RC4_STEP << i, i -= 8; 195280304Sjkim case 4: 196280304Sjkim otp |= RC4_STEP << i, i -= 8; 197280304Sjkim case 3: 198280304Sjkim otp |= RC4_STEP << i, i -= 8; 199280304Sjkim case 2: 200280304Sjkim otp |= RC4_STEP << i, i -= 8; 201280304Sjkim case 1: 202280304Sjkim otp |= RC4_STEP << i, i -= 8; 203280304Sjkim case 0:; /* 204280304Sjkim * it's never the case, 205280304Sjkim * but it has to be here 206280304Sjkim * for ultrix? 207280304Sjkim */ 208280304Sjkim } 209280304Sjkim ochunk &= ~mask; 210280304Sjkim ochunk |= (otp ^ ichunk) & mask; 211280304Sjkim *(RC4_CHUNK *) outdata = ochunk; 212280304Sjkim } 213280304Sjkim key->x = x; 214280304Sjkim key->y = y; 215280304Sjkim return; 216280304Sjkim } else { /* LITTLE-ENDIAN CASE */ 217280304Sjkim# define LESHFT(c) (((c)*8)&(sizeof(RC4_CHUNK)*8-1)) 218280304Sjkim for (; len & (0 - sizeof(RC4_CHUNK)); len -= sizeof(RC4_CHUNK)) { 219280304Sjkim ichunk = *(RC4_CHUNK *) indata; 220280304Sjkim otp = RC4_STEP; 221280304Sjkim otp |= RC4_STEP << 8; 222280304Sjkim otp |= RC4_STEP << 16; 223280304Sjkim otp |= RC4_STEP << 24; 224280304Sjkim if (sizeof(RC4_CHUNK) == 8) { 225280304Sjkim otp |= RC4_STEP << LESHFT(4); 226280304Sjkim otp |= RC4_STEP << LESHFT(5); 227280304Sjkim otp |= RC4_STEP << LESHFT(6); 228280304Sjkim otp |= RC4_STEP << LESHFT(7); 229280304Sjkim } 230280304Sjkim *(RC4_CHUNK *) outdata = otp ^ ichunk; 231280304Sjkim indata += sizeof(RC4_CHUNK); 232280304Sjkim outdata += sizeof(RC4_CHUNK); 233280304Sjkim } 234280304Sjkim if (len) { 235280304Sjkim RC4_CHUNK mask = (RC4_CHUNK) - 1, ochunk; 23659191Skris 237280304Sjkim ichunk = *(RC4_CHUNK *) indata; 238280304Sjkim ochunk = *(RC4_CHUNK *) outdata; 239280304Sjkim otp = 0; 240280304Sjkim i = 0; 241280304Sjkim mask >>= (sizeof(RC4_CHUNK) - len) << 3; 242280304Sjkim switch (len & (sizeof(RC4_CHUNK) - 1)) { 243280304Sjkim case 7: 244280304Sjkim otp = RC4_STEP, i += 8; 245280304Sjkim case 6: 246280304Sjkim otp |= RC4_STEP << i, i += 8; 247280304Sjkim case 5: 248280304Sjkim otp |= RC4_STEP << i, i += 8; 249280304Sjkim case 4: 250280304Sjkim otp |= RC4_STEP << i, i += 8; 251280304Sjkim case 3: 252280304Sjkim otp |= RC4_STEP << i, i += 8; 253280304Sjkim case 2: 254280304Sjkim otp |= RC4_STEP << i, i += 8; 255280304Sjkim case 1: 256280304Sjkim otp |= RC4_STEP << i, i += 8; 257280304Sjkim case 0:; /* 258280304Sjkim * it's never the case, 259280304Sjkim * but it has to be here 260280304Sjkim * for ultrix? 261280304Sjkim */ 262280304Sjkim } 263280304Sjkim ochunk &= ~mask; 264280304Sjkim ochunk |= (otp ^ ichunk) & mask; 265280304Sjkim *(RC4_CHUNK *) outdata = ochunk; 266280304Sjkim } 267280304Sjkim key->x = x; 268280304Sjkim key->y = y; 269280304Sjkim return; 270280304Sjkim } 271280304Sjkim } 27259191Skris#endif 27355714Skris#define LOOP(in,out) \ 274280304Sjkim x=((x+1)&0xff); \ 275280304Sjkim tx=d[x]; \ 276280304Sjkim y=(tx+y)&0xff; \ 277280304Sjkim d[x]=ty=d[y]; \ 278280304Sjkim d[y]=tx; \ 279280304Sjkim (out) = d[(tx+ty)&0xff]^ (in); 28055714Skris 28155714Skris#ifndef RC4_INDEX 282280304Sjkim# define RC4_LOOP(a,b,i) LOOP(*((a)++),*((b)++)) 28355714Skris#else 284280304Sjkim# define RC4_LOOP(a,b,i) LOOP(a[i],b[i]) 28555714Skris#endif 28655714Skris 287280304Sjkim i = len >> 3; 288280304Sjkim if (i) { 289280304Sjkim for (;;) { 290280304Sjkim RC4_LOOP(indata, outdata, 0); 291280304Sjkim RC4_LOOP(indata, outdata, 1); 292280304Sjkim RC4_LOOP(indata, outdata, 2); 293280304Sjkim RC4_LOOP(indata, outdata, 3); 294280304Sjkim RC4_LOOP(indata, outdata, 4); 295280304Sjkim RC4_LOOP(indata, outdata, 5); 296280304Sjkim RC4_LOOP(indata, outdata, 6); 297280304Sjkim RC4_LOOP(indata, outdata, 7); 29855714Skris#ifdef RC4_INDEX 299280304Sjkim indata += 8; 300280304Sjkim outdata += 8; 30155714Skris#endif 302280304Sjkim if (--i == 0) 303280304Sjkim break; 304280304Sjkim } 305280304Sjkim } 306280304Sjkim i = len & 0x07; 307280304Sjkim if (i) { 308280304Sjkim for (;;) { 309280304Sjkim RC4_LOOP(indata, outdata, 0); 310280304Sjkim if (--i == 0) 311280304Sjkim break; 312280304Sjkim RC4_LOOP(indata, outdata, 1); 313280304Sjkim if (--i == 0) 314280304Sjkim break; 315280304Sjkim RC4_LOOP(indata, outdata, 2); 316280304Sjkim if (--i == 0) 317280304Sjkim break; 318280304Sjkim RC4_LOOP(indata, outdata, 3); 319280304Sjkim if (--i == 0) 320280304Sjkim break; 321280304Sjkim RC4_LOOP(indata, outdata, 4); 322280304Sjkim if (--i == 0) 323280304Sjkim break; 324280304Sjkim RC4_LOOP(indata, outdata, 5); 325280304Sjkim if (--i == 0) 326280304Sjkim break; 327280304Sjkim RC4_LOOP(indata, outdata, 6); 328280304Sjkim if (--i == 0) 329280304Sjkim break; 330280304Sjkim } 331280304Sjkim } 332280304Sjkim key->x = x; 333280304Sjkim key->y = y; 334280304Sjkim} 335