1/*      $OpenBSD: cast.c,v 1.4 2012/04/25 04:12:27 matthew Exp $       */
2
3/*
4 *	CAST-128 in C
5 *	Written by Steve Reid <sreid@sea-to-sky.net>
6 *	100% Public Domain - no warranty
7 *	Released 1997.10.11
8 */
9
10#include <sys/types.h>
11#include <sys/systm.h>
12#include <crypto/cast.h>
13#include <crypto/castsb.h>
14
15/* Macros to access 8-bit bytes out of a 32-bit word */
16#define U_INT8_Ta(x) ( (u_int8_t) (x>>24) )
17#define U_INT8_Tb(x) ( (u_int8_t) ((x>>16)&255) )
18#define U_INT8_Tc(x) ( (u_int8_t) ((x>>8)&255) )
19#define U_INT8_Td(x) ( (u_int8_t) ((x)&255) )
20
21/* Circular left shift */
22#define ROL(x, n) ( ((x)<<(n)) | ((x)>>(32-(n))) )
23
24/* CAST-128 uses three different round functions */
25#define F1(l, r, i) \
26	t = ROL(key->xkey[i] + r, key->xkey[i+16]); \
27	l ^= ((cast_sbox1[U_INT8_Ta(t)] ^ cast_sbox2[U_INT8_Tb(t)]) - \
28	 cast_sbox3[U_INT8_Tc(t)]) + cast_sbox4[U_INT8_Td(t)];
29#define F2(l, r, i) \
30	t = ROL(key->xkey[i] ^ r, key->xkey[i+16]); \
31	l ^= ((cast_sbox1[U_INT8_Ta(t)] - cast_sbox2[U_INT8_Tb(t)]) + \
32	 cast_sbox3[U_INT8_Tc(t)]) ^ cast_sbox4[U_INT8_Td(t)];
33#define F3(l, r, i) \
34	t = ROL(key->xkey[i] - r, key->xkey[i+16]); \
35	l ^= ((cast_sbox1[U_INT8_Ta(t)] + cast_sbox2[U_INT8_Tb(t)]) ^ \
36	 cast_sbox3[U_INT8_Tc(t)]) - cast_sbox4[U_INT8_Td(t)];
37
38
39/***** Encryption Function *****/
40
41void
42cast_encrypt(cast_key *key, u_int8_t *inblock, u_int8_t *outblock)
43{
44	u_int32_t t, l, r;
45
46	/* Get inblock into l,r */
47	l = ((u_int32_t)inblock[0] << 24) | ((u_int32_t)inblock[1] << 16) |
48	    ((u_int32_t)inblock[2] << 8) | (u_int32_t)inblock[3];
49	r = ((u_int32_t)inblock[4] << 24) | ((u_int32_t)inblock[5] << 16) |
50	    ((u_int32_t)inblock[6] << 8) | (u_int32_t)inblock[7];
51	/* Do the work */
52	F1(l, r,  0);
53	F2(r, l,  1);
54	F3(l, r,  2);
55	F1(r, l,  3);
56	F2(l, r,  4);
57	F3(r, l,  5);
58	F1(l, r,  6);
59	F2(r, l,  7);
60	F3(l, r,  8);
61	F1(r, l,  9);
62	F2(l, r, 10);
63	F3(r, l, 11);
64	/* Only do full 16 rounds if key length > 80 bits */
65	if (key->rounds > 12) {
66		F1(l, r, 12);
67		F2(r, l, 13);
68		F3(l, r, 14);
69		F1(r, l, 15);
70	}
71	/* Put l,r into outblock */
72	outblock[0] = U_INT8_Ta(r);
73	outblock[1] = U_INT8_Tb(r);
74	outblock[2] = U_INT8_Tc(r);
75	outblock[3] = U_INT8_Td(r);
76	outblock[4] = U_INT8_Ta(l);
77	outblock[5] = U_INT8_Tb(l);
78	outblock[6] = U_INT8_Tc(l);
79	outblock[7] = U_INT8_Td(l);
80	/* Wipe clean */
81	t = l = r = 0;
82}
83
84
85/***** Decryption Function *****/
86
87void
88cast_decrypt(cast_key *key, u_int8_t *inblock, u_int8_t *outblock)
89{
90	u_int32_t t, l, r;
91
92	/* Get inblock into l,r */
93	r = ((u_int32_t)inblock[0] << 24) | ((u_int32_t)inblock[1] << 16) |
94	    ((u_int32_t)inblock[2] << 8) | (u_int32_t)inblock[3];
95	l = ((u_int32_t)inblock[4] << 24) | ((u_int32_t)inblock[5] << 16) |
96	    ((u_int32_t)inblock[6] << 8) | (u_int32_t)inblock[7];
97	/* Do the work */
98	/* Only do full 16 rounds if key length > 80 bits */
99	if (key->rounds > 12) {
100		F1(r, l, 15);
101		F3(l, r, 14);
102		F2(r, l, 13);
103		F1(l, r, 12);
104	}
105	F3(r, l, 11);
106	F2(l, r, 10);
107	F1(r, l,  9);
108	F3(l, r,  8);
109	F2(r, l,  7);
110	F1(l, r,  6);
111	F3(r, l,  5);
112	F2(l, r,  4);
113	F1(r, l,  3);
114	F3(l, r,  2);
115	F2(r, l,  1);
116	F1(l, r,  0);
117	/* Put l,r into outblock */
118	outblock[0] = U_INT8_Ta(l);
119	outblock[1] = U_INT8_Tb(l);
120	outblock[2] = U_INT8_Tc(l);
121	outblock[3] = U_INT8_Td(l);
122	outblock[4] = U_INT8_Ta(r);
123	outblock[5] = U_INT8_Tb(r);
124	outblock[6] = U_INT8_Tc(r);
125	outblock[7] = U_INT8_Td(r);
126	/* Wipe clean */
127	t = l = r = 0;
128}
129
130
131/***** Key Schedule *****/
132
133void
134cast_setkey(cast_key *key, u_int8_t *rawkey, int keybytes)
135{
136	u_int32_t t[4], z[4], x[4];
137	int i;
138
139	/* Set number of rounds to 12 or 16, depending on key length */
140	key->rounds = (keybytes <= 10 ? 12 : 16);
141
142	/* Copy key to workspace x */
143	for (i = 0; i < 4; i++) {
144		x[i] = 0;
145		if ((i*4+0) < keybytes) x[i] = (u_int32_t)rawkey[i*4+0] << 24;
146		if ((i*4+1) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+1] << 16;
147		if ((i*4+2) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+2] << 8;
148		if ((i*4+3) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+3];
149	}
150	/* Generate 32 subkeys, four at a time */
151	for (i = 0; i < 32; i+=4) {
152		switch (i & 4) {
153		case 0:
154			t[0] = z[0] = x[0] ^ cast_sbox5[U_INT8_Tb(x[3])] ^
155			    cast_sbox6[U_INT8_Td(x[3])] ^
156			    cast_sbox7[U_INT8_Ta(x[3])] ^
157			    cast_sbox8[U_INT8_Tc(x[3])] ^
158			    cast_sbox7[U_INT8_Ta(x[2])];
159			t[1] = z[1] = x[2] ^ cast_sbox5[U_INT8_Ta(z[0])] ^
160			    cast_sbox6[U_INT8_Tc(z[0])] ^
161			    cast_sbox7[U_INT8_Tb(z[0])] ^
162			    cast_sbox8[U_INT8_Td(z[0])] ^
163			    cast_sbox8[U_INT8_Tc(x[2])];
164			t[2] = z[2] = x[3] ^ cast_sbox5[U_INT8_Td(z[1])] ^
165			    cast_sbox6[U_INT8_Tc(z[1])] ^
166			    cast_sbox7[U_INT8_Tb(z[1])] ^
167			    cast_sbox8[U_INT8_Ta(z[1])] ^
168			    cast_sbox5[U_INT8_Tb(x[2])];
169			t[3] = z[3] = x[1] ^ cast_sbox5[U_INT8_Tc(z[2])] ^
170			    cast_sbox6[U_INT8_Tb(z[2])] ^
171			    cast_sbox7[U_INT8_Td(z[2])] ^
172			    cast_sbox8[U_INT8_Ta(z[2])] ^
173			    cast_sbox6[U_INT8_Td(x[2])];
174			break;
175		 case 4:
176			t[0] = x[0] = z[2] ^ cast_sbox5[U_INT8_Tb(z[1])] ^
177			    cast_sbox6[U_INT8_Td(z[1])] ^
178			    cast_sbox7[U_INT8_Ta(z[1])] ^
179			    cast_sbox8[U_INT8_Tc(z[1])] ^
180			    cast_sbox7[U_INT8_Ta(z[0])];
181			t[1] = x[1] = z[0] ^ cast_sbox5[U_INT8_Ta(x[0])] ^
182			    cast_sbox6[U_INT8_Tc(x[0])] ^
183			    cast_sbox7[U_INT8_Tb(x[0])] ^
184			    cast_sbox8[U_INT8_Td(x[0])] ^
185			    cast_sbox8[U_INT8_Tc(z[0])];
186			t[2] = x[2] = z[1] ^ cast_sbox5[U_INT8_Td(x[1])] ^
187			    cast_sbox6[U_INT8_Tc(x[1])] ^
188			    cast_sbox7[U_INT8_Tb(x[1])] ^
189			    cast_sbox8[U_INT8_Ta(x[1])] ^
190			    cast_sbox5[U_INT8_Tb(z[0])];
191			t[3] = x[3] = z[3] ^ cast_sbox5[U_INT8_Tc(x[2])] ^
192			    cast_sbox6[U_INT8_Tb(x[2])] ^
193			    cast_sbox7[U_INT8_Td(x[2])] ^
194			    cast_sbox8[U_INT8_Ta(x[2])] ^
195			    cast_sbox6[U_INT8_Td(z[0])];
196			break;
197		}
198		switch (i & 12) {
199		case 0:
200		case 12:
201			key->xkey[i+0] = cast_sbox5[U_INT8_Ta(t[2])] ^
202			    cast_sbox6[U_INT8_Tb(t[2])] ^
203			    cast_sbox7[U_INT8_Td(t[1])] ^
204			    cast_sbox8[U_INT8_Tc(t[1])];
205			key->xkey[i+1] = cast_sbox5[U_INT8_Tc(t[2])] ^
206			    cast_sbox6[U_INT8_Td(t[2])] ^
207			    cast_sbox7[U_INT8_Tb(t[1])] ^
208			    cast_sbox8[U_INT8_Ta(t[1])];
209			key->xkey[i+2] = cast_sbox5[U_INT8_Ta(t[3])] ^
210			    cast_sbox6[U_INT8_Tb(t[3])] ^
211			    cast_sbox7[U_INT8_Td(t[0])] ^
212			    cast_sbox8[U_INT8_Tc(t[0])];
213			key->xkey[i+3] = cast_sbox5[U_INT8_Tc(t[3])] ^
214			    cast_sbox6[U_INT8_Td(t[3])] ^
215			    cast_sbox7[U_INT8_Tb(t[0])] ^
216			    cast_sbox8[U_INT8_Ta(t[0])];
217			break;
218		case 4:
219		case 8:
220			key->xkey[i+0] = cast_sbox5[U_INT8_Td(t[0])] ^
221			    cast_sbox6[U_INT8_Tc(t[0])] ^
222			    cast_sbox7[U_INT8_Ta(t[3])] ^
223			    cast_sbox8[U_INT8_Tb(t[3])];
224			key->xkey[i+1] = cast_sbox5[U_INT8_Tb(t[0])] ^
225			    cast_sbox6[U_INT8_Ta(t[0])] ^
226			    cast_sbox7[U_INT8_Tc(t[3])] ^
227			    cast_sbox8[U_INT8_Td(t[3])];
228			key->xkey[i+2] = cast_sbox5[U_INT8_Td(t[1])] ^
229			    cast_sbox6[U_INT8_Tc(t[1])] ^
230			    cast_sbox7[U_INT8_Ta(t[2])] ^
231			    cast_sbox8[U_INT8_Tb(t[2])];
232			key->xkey[i+3] = cast_sbox5[U_INT8_Tb(t[1])] ^
233			    cast_sbox6[U_INT8_Ta(t[1])] ^
234			    cast_sbox7[U_INT8_Tc(t[2])] ^
235			    cast_sbox8[U_INT8_Td(t[2])];
236			break;
237		}
238		switch (i & 12) {
239		case 0:
240			key->xkey[i+0] ^= cast_sbox5[U_INT8_Tc(z[0])];
241			key->xkey[i+1] ^= cast_sbox6[U_INT8_Tc(z[1])];
242			key->xkey[i+2] ^= cast_sbox7[U_INT8_Tb(z[2])];
243			key->xkey[i+3] ^= cast_sbox8[U_INT8_Ta(z[3])];
244			break;
245		case 4:
246			key->xkey[i+0] ^= cast_sbox5[U_INT8_Ta(x[2])];
247			key->xkey[i+1] ^= cast_sbox6[U_INT8_Tb(x[3])];
248			key->xkey[i+2] ^= cast_sbox7[U_INT8_Td(x[0])];
249			key->xkey[i+3] ^= cast_sbox8[U_INT8_Td(x[1])];
250			break;
251		case 8:
252			key->xkey[i+0] ^= cast_sbox5[U_INT8_Tb(z[2])];
253			key->xkey[i+1] ^= cast_sbox6[U_INT8_Ta(z[3])];
254			key->xkey[i+2] ^= cast_sbox7[U_INT8_Tc(z[0])];
255			key->xkey[i+3] ^= cast_sbox8[U_INT8_Tc(z[1])];
256			break;
257		case 12:
258			key->xkey[i+0] ^= cast_sbox5[U_INT8_Td(x[0])];
259			key->xkey[i+1] ^= cast_sbox6[U_INT8_Td(x[1])];
260			key->xkey[i+2] ^= cast_sbox7[U_INT8_Ta(x[2])];
261			key->xkey[i+3] ^= cast_sbox8[U_INT8_Tb(x[3])];
262			break;
263		}
264		if (i >= 16) {
265			key->xkey[i+0] &= 31;
266			key->xkey[i+1] &= 31;
267			key->xkey[i+2] &= 31;
268			key->xkey[i+3] &= 31;
269		}
270	}
271	/* Wipe clean */
272	explicit_bzero(t, sizeof(t));
273	explicit_bzero(x, sizeof(x));
274	explicit_bzero(z, sizeof(z));
275}
276
277/* Made in Canada */
278